org.apache.gobblin.configuration.SourceState.getProperties java code examples

/**
 * If config store is enabled, then intersection of topics from blacklisting/whitelisting will be taken against
 * the topics from config-store
 */
private List<KafkaTopic> getFilteredTopics(SourceState state) {
 List<Pattern> blacklist = DatasetFilterUtils.getPatternList(state, TOPIC_BLACKLIST);
 List<Pattern> whitelist = DatasetFilterUtils.getPatternList(state, TOPIC_WHITELIST);
 List<KafkaTopic> topics = this.kafkaConsumerClient.get().getFilteredTopics(blacklist, whitelist);
 Optional<String> configStoreUri = ConfigStoreUtils.getConfigStoreUri(state.getProperties());
 if (configStoreUri.isPresent()) {
  List<KafkaTopic> topicsFromConfigStore = ConfigStoreUtils
    .getTopicsFromConfigStore(state.getProperties(), configStoreUri.get(), this.kafkaConsumerClient.get());
  return topics.stream().filter((KafkaTopic p) -> (topicsFromConfigStore.stream()
    .anyMatch((KafkaTopic q) -> q.getName().equalsIgnoreCase(p.getName())))).collect(toList());
 }
 return topics;
}

/**
 * Can be overriden to specify a non-pluggable {@link org.apache.gobblin.dataset.DatasetsFinder}.
 * @throws IOException
 */
protected IterableDatasetFinder createDatasetsFinder(SourceState state) throws IOException {
 return DatasetUtils.instantiateIterableDatasetFinder(state.getProperties(),
   HadoopUtils.getSourceFileSystem(state), null);
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 Config rootCfg = ConfigUtils.propertiesToConfig(state.getProperties());
 Config cfg = rootCfg.hasPath(CONFIG_NAMESPACE) ? rootCfg.getConfig(CONFIG_NAMESPACE) :
    ConfigFactory.empty();
 int numHellos = cfg.hasPath(NUM_HELLOS_KEY) ? cfg.getInt(NUM_HELLOS_KEY) : DEFAULT_NUM_HELLOS;
 Extract extract = new Extract(TableType.APPEND_ONLY,
    HelloWorldSource.class.getPackage().getName(),
    HelloWorldSource.class.getSimpleName());
 List<WorkUnit> wus = new ArrayList<>(numHellos);
 for (int i = 1; i <= numHellos; ++i) {
  WorkUnit wu = new WorkUnit(extract);
  wu.setProp(HELLO_ID_FULL_KEY, i);
  wus.add(wu);
 }
 return wus;
}

/**
 * Create a work unit for each configuration defined or a single work unit if no configurations are defined
 * @param state see {@link org.apache.gobblin.configuration.SourceState}
 * @return list of workunits
 */
@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 List<WorkUnit> workUnits = Lists.newArrayList();
 Config config = ConfigUtils.propertiesToConfig(state.getProperties());
 Config sourceConfig = ConfigUtils.getConfigOrEmpty(config, DATASET_CLEANER_SOURCE_PREFIX);
 List<String> configurationNames = ConfigUtils.getStringList(config, DATASET_CLEANER_CONFIGURATIONS);
 // use a dummy configuration name if none set
 if (configurationNames.isEmpty()) {
  configurationNames = ImmutableList.of("DummyConfig");
 }
 for (String configurationName: configurationNames) {
  WorkUnit workUnit = WorkUnit.createEmpty();
  // specific configuration prefixed by the configuration name has precedence over the source specific configuration
  // and the source specific configuration has precedence over the general configuration
  Config wuConfig = ConfigUtils.getConfigOrEmpty(sourceConfig, configurationName).withFallback(sourceConfig)
    .withFallback(config);
  workUnit.setProps(ConfigUtils.configToProperties(wuConfig), new Properties());
  TaskUtils.setTaskFactoryClass(workUnit, DatasetCleanerTaskFactory.class);
  workUnits.add(workUnit);
 }
 return workUnits;
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 Config config = ConfigUtils.propertiesToConfig(state.getProperties());
 Consumer<String, byte[]> consumer = getKafkaConsumer(config);
 LOG.debug("Consumer is {}", consumer);
 String topic = ConfigUtils.getString(config, TOPIC_WHITELIST,
   StringUtils.EMPTY); // TODO: fix this to use the new API when KafkaWrapper is fixed
 List<WorkUnit> workUnits = new ArrayList<WorkUnit>();
 List<PartitionInfo> topicPartitions;
 topicPartitions = consumer.partitionsFor(topic);
 LOG.info("Partition count is {}", topicPartitions.size());
 for (PartitionInfo topicPartition : topicPartitions) {
  Extract extract = this.createExtract(DEFAULT_TABLE_TYPE, DEFAULT_NAMESPACE_NAME, topicPartition.topic());
  LOG.info("Partition info is {}", topicPartition);
  WorkUnit workUnit = WorkUnit.create(extract);
  setTopicNameInState(workUnit, topicPartition.topic());
  workUnit.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, topicPartition.topic());
  setPartitionId(workUnit, topicPartition.partition());
  workUnits.add(workUnit);
 }
 return workUnits;
}

@VisibleForTesting
public void initialize(SourceState state) throws IOException {
 this.updateProvider = UpdateProviderFactory.create(state);
 this.metricContext = Instrumented.getMetricContext(state, HiveSource.class);
 this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, EventConstants.CONVERSION_NAMESPACE).build();
 this.avroSchemaManager = new AvroSchemaManager(getSourceFs(state), state);
 this.workunits = Lists.newArrayList();
 this.watermarker =
   GobblinConstructorUtils.invokeConstructor(HiveSourceWatermarkerFactory.class,
     state.getProp(HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, DEFAULT_HIVE_SOURCE_WATERMARKER_FACTORY_CLASS))
     .createFromState(state);
 EventSubmitter.submit(Optional.of(this.eventSubmitter), EventConstants.CONVERSION_SETUP_EVENT);
 this.datasetFinder = GobblinConstructorUtils.invokeConstructor(HiveDatasetFinder.class,
   state.getProp(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY, DEFAULT_HIVE_SOURCE_DATASET_FINDER_CLASS), getSourceFs(state), state.getProperties(),
   this.eventSubmitter);
 int maxLookBackDays = state.getPropAsInt(HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS_KEY, DEFAULT_HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS);
 this.maxLookBackTime = new DateTime().minusDays(maxLookBackDays).getMillis();
 this.ignoreDataPathIdentifierList = COMMA_BASED_SPLITTER.splitToList(state.getProp(HIVE_SOURCE_IGNORE_DATA_PATH_IDENTIFIER_KEY,
   DEFAULT_HIVE_SOURCE_IGNORE_DATA_PATH_IDENTIFIER));
 silenceHiveLoggers();
}

int maxThreads = state.getPropAsInt(MAX_CONCURRENT_LISTING_SERVICES, DEFAULT_MAX_CONCURRENT_LISTING_SERVICES);
final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()).build();
  .instantiateDatasetFinder(state.getProperties(), sourceFs, DEFAULT_DATASET_PROFILE_CLASS_KEY,
    this.eventSubmitter, state);

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 try {
  FileSystem fs = HadoopUtils.getSourceFileSystem(state);
  Config config = ConfigUtils.propertiesToConfig(state.getProperties());
  if (state.contains(COPY_TABLE_KEY)) {
   HiveDataset dataset = getHiveDataset(state.getProp(COPY_TABLE_KEY), fs, state);
   WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(dataset,
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null);
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  } else if (state.contains(MATERIALIZE_VIEW)) {
   HiveDataset dataset = getHiveDataset(state.getProp(MATERIALIZE_VIEW), fs, state);
   WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(dataset, getOutputStorageFormat(state),
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null);
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  } else if (state.contains(MATERIALIZE_QUERY)) {
   String query = state.getProp(MATERIALIZE_QUERY);
   WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit(query, getOutputStorageFormat(state),
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), null));
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  }
 } catch (IOException ioe) {
  throw new RuntimeException(ioe);
 }
 throw new RuntimeException(String.format("Must specify either %s, %s, or %s.", COPY_TABLE_KEY, MATERIALIZE_QUERY,
   MATERIALIZE_VIEW));
}

Config config = ConfigUtils.propertiesToConfig(state.getProperties());
GobblinKafkaConsumerClientFactory kafkaConsumerClientFactory = kafkaConsumerClientResolver
    .resolveClass(

@Override
public WorkUnitStream getWorkunitStream(SourceState state) {
 try {
  fs = getSourceFileSystem(state);
  state.setProp(COMPACTION_INIT_TIME, DateTimeUtils.currentTimeMillis());
  suite = CompactionSuiteUtils.getCompactionSuiteFactory(state).createSuite(state);
  initRequestAllocator(state);
  initJobDir(state);
  copyJarDependencies(state);
  DatasetsFinder finder = DatasetUtils.instantiateDatasetFinder(state.getProperties(),
      getSourceFileSystem(state),
      DefaultFileSystemGlobFinder.class.getName());
  List<Dataset> datasets = finder.findDatasets();
  CompactionWorkUnitIterator workUnitIterator = new CompactionWorkUnitIterator ();
  // Spawn a single thread to create work units
  new Thread(new SingleWorkUnitGeneratorService (state, prioritize(datasets, state), workUnitIterator), "SingleWorkUnitGeneratorService").start();
  return new BasicWorkUnitStream.Builder (workUnitIterator).build();
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 configureIfNeeded(ConfigFactory.parseProperties(state.getProperties()));
 final List<WorkUnitState> previousWorkUnitStates = state.getPreviousWorkUnitStates();
 if (!previousWorkUnitStates.isEmpty())

  .getPropAsInt(CopySource.MAX_CONCURRENT_LISTING_SERVICES, CopySource.DEFAULT_MAX_CONCURRENT_LISTING_SERVICES);
final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()).build();
  .instantiateDatasetFinder(state.getProperties(), sourceFs, CopySource.DEFAULT_DATASET_PROFILE_CLASS_KEY,
    eventSubmitter, state);

/**
 * Can be overriden to specify a non-pluggable {@link org.apache.gobblin.dataset.DatasetsFinder}.
 * @throws IOException
 */
protected IterableDatasetFinder createDatasetsFinder(SourceState state) throws IOException {
 return DatasetUtils.instantiateIterableDatasetFinder(state.getProperties(),
   HadoopUtils.getSourceFileSystem(state), null);
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 Config rootCfg = ConfigUtils.propertiesToConfig(state.getProperties());
 Config cfg = rootCfg.hasPath(CONFIG_NAMESPACE) ? rootCfg.getConfig(CONFIG_NAMESPACE) :
    ConfigFactory.empty();
 int numHellos = cfg.hasPath(NUM_HELLOS_KEY) ? cfg.getInt(NUM_HELLOS_KEY) : DEFAULT_NUM_HELLOS;
 Extract extract = new Extract(TableType.APPEND_ONLY,
    HelloWorldSource.class.getPackage().getName(),
    HelloWorldSource.class.getSimpleName());
 List<WorkUnit> wus = new ArrayList<>(numHellos);
 for (int i = 1; i <= numHellos; ++i) {
  WorkUnit wu = new WorkUnit(extract);
  wu.setProp(HELLO_ID_FULL_KEY, i);
  wus.add(wu);
 }
 return wus;
}

/**
 * If config store is enabled, then intersection of topics from blacklisting/whitelisting will be taken against
 * the topics from config-store
 */
private List<KafkaTopic> getFilteredTopics(SourceState state) {
 List<Pattern> blacklist = DatasetFilterUtils.getPatternList(state, TOPIC_BLACKLIST);
 List<Pattern> whitelist = DatasetFilterUtils.getPatternList(state, TOPIC_WHITELIST);
 List<KafkaTopic> topics = this.kafkaConsumerClient.get().getFilteredTopics(blacklist, whitelist);
 Optional<String> configStoreUri = ConfigStoreUtils.getConfigStoreUri(state.getProperties());
 if (configStoreUri.isPresent()) {
  List<KafkaTopic> topicsFromConfigStore = ConfigStoreUtils
    .getTopicsFromConfigStore(state.getProperties(), configStoreUri.get(), this.kafkaConsumerClient.get());
  return topics.stream().filter((KafkaTopic p) -> (topicsFromConfigStore.stream()
    .anyMatch((KafkaTopic q) -> q.getName().equalsIgnoreCase(p.getName())))).collect(toList());
 }
 return topics;
}

/**
 * Create a work unit for each configuration defined or a single work unit if no configurations are defined
 * @param state see {@link org.apache.gobblin.configuration.SourceState}
 * @return list of workunits
 */
@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 List<WorkUnit> workUnits = Lists.newArrayList();
 Config config = ConfigUtils.propertiesToConfig(state.getProperties());
 Config sourceConfig = ConfigUtils.getConfigOrEmpty(config, DATASET_CLEANER_SOURCE_PREFIX);
 List<String> configurationNames = ConfigUtils.getStringList(config, DATASET_CLEANER_CONFIGURATIONS);
 // use a dummy configuration name if none set
 if (configurationNames.isEmpty()) {
  configurationNames = ImmutableList.of("DummyConfig");
 }
 for (String configurationName: configurationNames) {
  WorkUnit workUnit = WorkUnit.createEmpty();
  // specific configuration prefixed by the configuration name has precedence over the source specific configuration
  // and the source specific configuration has precedence over the general configuration
  Config wuConfig = ConfigUtils.getConfigOrEmpty(sourceConfig, configurationName).withFallback(sourceConfig)
    .withFallback(config);
  workUnit.setProps(ConfigUtils.configToProperties(wuConfig), new Properties());
  TaskUtils.setTaskFactoryClass(workUnit, DatasetCleanerTaskFactory.class);
  workUnits.add(workUnit);
 }
 return workUnits;
}

@VisibleForTesting
public void initialize(SourceState state) throws IOException {
 this.updateProvider = UpdateProviderFactory.create(state);
 this.metricContext = Instrumented.getMetricContext(state, HiveSource.class);
 this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, EventConstants.CONVERSION_NAMESPACE).build();
 this.avroSchemaManager = new AvroSchemaManager(getSourceFs(state), state);
 this.workunits = Lists.newArrayList();
 this.watermarker =
   GobblinConstructorUtils.invokeConstructor(HiveSourceWatermarkerFactory.class,
     state.getProp(HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, DEFAULT_HIVE_SOURCE_WATERMARKER_FACTORY_CLASS))
     .createFromState(state);
 EventSubmitter.submit(Optional.of(this.eventSubmitter), EventConstants.CONVERSION_SETUP_EVENT);
 this.datasetFinder = GobblinConstructorUtils.invokeConstructor(HiveDatasetFinder.class,
   state.getProp(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY, DEFAULT_HIVE_SOURCE_DATASET_FINDER_CLASS), getSourceFs(state), state.getProperties(),
   this.eventSubmitter);
 int maxLookBackDays = state.getPropAsInt(HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS_KEY, DEFAULT_HIVE_SOURCE_MAXIMUM_LOOKBACK_DAYS);
 this.maxLookBackTime = new DateTime().minusDays(maxLookBackDays).getMillis();
 this.ignoreDataPathIdentifierList = COMMA_BASED_SPLITTER.splitToList(state.getProp(HIVE_SOURCE_IGNORE_DATA_PATH_IDENTIFIER_KEY,
   DEFAULT_HIVE_SOURCE_IGNORE_DATA_PATH_IDENTIFIER));
 silenceHiveLoggers();
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 try {
  FileSystem fs = HadoopUtils.getSourceFileSystem(state);
  Config config = ConfigUtils.propertiesToConfig(state.getProperties());
  if (state.contains(COPY_TABLE_KEY)) {
   HiveDataset dataset = getHiveDataset(state.getProp(COPY_TABLE_KEY), fs, state);
   WorkUnit workUnit = HiveMaterializer.tableCopyWorkUnit(dataset,
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null);
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  } else if (state.contains(MATERIALIZE_VIEW)) {
   HiveDataset dataset = getHiveDataset(state.getProp(MATERIALIZE_VIEW), fs, state);
   WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(dataset, getOutputStorageFormat(state),
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), dataset.getTable()), null);
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  } else if (state.contains(MATERIALIZE_QUERY)) {
   String query = state.getProp(MATERIALIZE_QUERY);
   WorkUnit workUnit = HiveMaterializer.queryResultMaterializationWorkUnit(query, getOutputStorageFormat(state),
     new StageableTableMetadata(config.getConfig(HIVE_MATERIALIZER_SOURCE_PREFIX), null));
   HiveTask.disableHiveWatermarker(workUnit);
   return Lists.newArrayList(workUnit);
  }
 } catch (IOException ioe) {
  throw new RuntimeException(ioe);
 }
 throw new RuntimeException(String.format("Must specify either %s, %s, or %s.", COPY_TABLE_KEY, MATERIALIZE_QUERY,
   MATERIALIZE_VIEW));
}

@Override
public WorkUnitStream getWorkunitStream(SourceState state) {
 try {
  fs = getSourceFileSystem(state);
  state.setProp(COMPACTION_INIT_TIME, DateTimeUtils.currentTimeMillis());
  suite = CompactionSuiteUtils.getCompactionSuiteFactory(state).createSuite(state);
  initRequestAllocator(state);
  initJobDir(state);
  copyJarDependencies(state);
  DatasetsFinder finder = DatasetUtils.instantiateDatasetFinder(state.getProperties(),
      getSourceFileSystem(state),
      DefaultFileSystemGlobFinder.class.getName());
  List<Dataset> datasets = finder.findDatasets();
  CompactionWorkUnitIterator workUnitIterator = new CompactionWorkUnitIterator ();
  // Spawn a single thread to create work units
  new Thread(new SingleWorkUnitGeneratorService (state, prioritize(datasets, state), workUnitIterator), "SingleWorkUnitGeneratorService").start();
  return new BasicWorkUnitStream.Builder (workUnitIterator).build();
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 configureIfNeeded(ConfigFactory.parseProperties(state.getProperties()));
 final List<WorkUnitState> previousWorkUnitStates = state.getPreviousWorkUnitStates();
 if (!previousWorkUnitStates.isEmpty())

Popular methods of SourceState

getProp
setProp
contains
getPreviousWorkUnitStates
Get a List of previous WorkUnitStates for a given datasetUrn.
getPropAsBoolean
getPropAsInt
<init>
Constructor.
getPropAsLong
getBroker
getPreviousWorkUnitStatesByDatasetUrns
Get a Map from dataset URNs (as being specified by ConfigurationKeys#DATASET_URN_KEYto the WorkUnitS
write
getPreviousDatasetStatesByUrns

Popular in Java

Parsing JSON documents to java classes using gson
getContentResolver (Context)
putExtra (Intent)
setRequestProperty (URLConnection)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
URLConnection (java.net)
A connection to a URL for reading or writing. For HTTP connections, see HttpURLConnection for docume
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Join (org.hibernate.mapping)
Best plugins for Eclipse

How to use getPropertiesmethodin org.apache.gobblin.configuration.SourceState

Best Java code snippets using org.apache.gobblin.configuration.SourceState.getProperties (Showing top 20 results out of 315)

How to use
getProperties
method
in
org.apache.gobblin.configuration.SourceState