public SimpleKafkaSpecConsumer(Config config, Optional<Logger> log) { // Consumer String kafkaConsumerClientClass = ConfigUtils.getString(config, CONSUMER_CLIENT_FACTORY_CLASS_KEY, DEFAULT_CONSUMER_CLIENT_FACTORY_CLASS); try { Class<?> clientFactoryClass = (Class<?>) Class.forName(kafkaConsumerClientClass); final GobblinKafkaConsumerClient.GobblinKafkaConsumerClientFactory factory = (GobblinKafkaConsumerClient.GobblinKafkaConsumerClientFactory) ConstructorUtils.invokeConstructor(clientFactoryClass); _kafkaConsumer = factory.create(config); } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException e) { if (log.isPresent()) { log.get().error("Failed to instantiate Kafka consumer from class " + kafkaConsumerClientClass, e); } throw new RuntimeException("Failed to instantiate Kafka consumer", e); } List<KafkaTopic> kafkaTopics = _kafkaConsumer.getFilteredTopics(Collections.EMPTY_LIST, Lists.newArrayList(Pattern.compile(config.getString(SimpleKafkaSpecExecutor.SPEC_KAFKA_TOPICS_KEY)))); _partitions = kafkaTopics.get(0).getPartitions(); _lowWatermark = Lists.newArrayList(Collections.nCopies(_partitions.size(), 0L)); _nextWatermark = Lists.newArrayList(Collections.nCopies(_partitions.size(), 0L)); _highWatermark = Lists.newArrayList(Collections.nCopies(_partitions.size(), 0L)); InputStream dummyInputStream = new ByteArrayInputStream(new byte[0]); _decoder = DecoderFactory.get().binaryDecoder(dummyInputStream, null); _reader = new SpecificDatumReader<AvroJobSpec>(AvroJobSpec.SCHEMA$); _versionWriter = new FixedSchemaVersionWriter(); }
@Override public KafkaTopic apply(Entry<String, List<PartitionInfo>> filteredTopicEntry) { return new KafkaTopic(filteredTopicEntry.getKey(), Lists.transform(filteredTopicEntry.getValue(), PARTITION_INFO_TO_KAFKA_PARTITION)); } }).toList();
@Override public String apply(KafkaTopic topic) { return topic.getName(); } }), state);
if (topic.getTopicSpecificState().isPresent()) { topicSpecificStateMap.computeIfAbsent(topic.getName(), k -> new State()) .addAllIfNotExist(topic.getTopicSpecificState().get()); new WorkUnitCreator(topic, state, Optional.fromNullable(topicSpecificStateMap.get(topic.getName())), workUnits));
if (topic.getTopicSpecificState().isPresent()) { topicSpecificStateMap.computeIfAbsent(topic.getName(), k -> new State()) .addAllIfNotExist(topic.getTopicSpecificState().get()); new WorkUnitCreator(topic, state, Optional.fromNullable(topicSpecificStateMap.get(topic.getName())), workUnits));
private List<WorkUnit> getWorkUnitsForTopic(KafkaTopic topic, SourceState state, Optional<State> topicSpecificState) { Timer.Context context = this.metricContext.timer("isTopicQualifiedTimer").time(); boolean topicQualified = isTopicQualified(topic); context.close(); List<WorkUnit> workUnits = Lists.newArrayList(); for (KafkaPartition partition : topic.getPartitions()) { WorkUnit workUnit = getWorkUnitForTopicPartition(partition, state, topicSpecificState); this.partitionsToBeProcessed.add(partition); if (workUnit != null) { // For disqualified topics, for each of its workunits set the high watermark to be the same // as the low watermark, so that it will be skipped. if (!topicQualified) { skipWorkUnit(workUnit); } workUnits.add(workUnit); } } return workUnits; }
@Override public List<KafkaTopic> getTopics() { List<TopicMetadata> topicMetadataList = getFilteredMetadataList(); List<KafkaTopic> filteredTopics = Lists.newArrayList(); for (TopicMetadata topicMetadata : topicMetadataList) { List<KafkaPartition> partitions = getPartitionsForTopic(topicMetadata); filteredTopics.add(new KafkaTopic(topicMetadata.topic(), partitions)); } return filteredTopics; }
@Override public boolean apply(@Nonnull KafkaTopic kafkaTopic) { return DatasetFilterUtils.survived(kafkaTopic.getName(), blacklist, whitelist); } }));
public SimpleKafkaSpecConsumer(Config config, Optional<Logger> log) { // Consumer String kafkaConsumerClientClass = ConfigUtils.getString(config, CONSUMER_CLIENT_FACTORY_CLASS_KEY, DEFAULT_CONSUMER_CLIENT_FACTORY_CLASS); try { Class<?> clientFactoryClass = (Class<?>) Class.forName(kafkaConsumerClientClass); final GobblinKafkaConsumerClient.GobblinKafkaConsumerClientFactory factory = (GobblinKafkaConsumerClient.GobblinKafkaConsumerClientFactory) ConstructorUtils.invokeConstructor(clientFactoryClass); _kafkaConsumer = factory.create(config); } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException e) { if (log.isPresent()) { log.get().error("Failed to instantiate Kafka consumer from class " + kafkaConsumerClientClass, e); } throw new RuntimeException("Failed to instantiate Kafka consumer", e); } List<KafkaTopic> kafkaTopics = _kafkaConsumer.getFilteredTopics(Collections.EMPTY_LIST, Lists.newArrayList(Pattern.compile(config.getString(SimpleKafkaSpecExecutor.SPEC_KAFKA_TOPICS_KEY)))); _partitions = kafkaTopics.get(0).getPartitions(); _lowWatermark = Lists.newArrayList(Collections.nCopies(_partitions.size(), 0L)); _nextWatermark = Lists.newArrayList(Collections.nCopies(_partitions.size(), 0L)); _highWatermark = Lists.newArrayList(Collections.nCopies(_partitions.size(), 0L)); InputStream dummyInputStream = new ByteArrayInputStream(new byte[0]); _decoder = DecoderFactory.get().binaryDecoder(dummyInputStream, null); _reader = new SpecificDatumReader<AvroJobSpec>(AvroJobSpec.SCHEMA$); _versionWriter = new FixedSchemaVersionWriter(); }
@Override public List<KafkaTopic> getFilteredTopics(List<Pattern> blacklist, List<Pattern> whitelist) { List<TopicMetadata> topicMetadataList = getFilteredMetadataList(blacklist, whitelist); List<KafkaTopic> filteredTopics = Lists.newArrayList(); for (TopicMetadata topicMetadata : topicMetadataList) { List<KafkaPartition> partitions = getPartitionsForTopic(topicMetadata); filteredTopics.add(new KafkaTopic(topicMetadata.topic(), partitions)); } return filteredTopics; }
/** * If config store is enabled, then intersection of topics from blacklisting/whitelisting will be taken against * the topics from config-store */ private List<KafkaTopic> getFilteredTopics(SourceState state) { List<Pattern> blacklist = DatasetFilterUtils.getPatternList(state, TOPIC_BLACKLIST); List<Pattern> whitelist = DatasetFilterUtils.getPatternList(state, TOPIC_WHITELIST); List<KafkaTopic> topics = this.kafkaConsumerClient.get().getFilteredTopics(blacklist, whitelist); Optional<String> configStoreUri = ConfigStoreUtils.getConfigStoreUri(state.getProperties()); if (configStoreUri.isPresent()) { List<KafkaTopic> topicsFromConfigStore = ConfigStoreUtils .getTopicsFromConfigStore(state.getProperties(), configStoreUri.get(), this.kafkaConsumerClient.get()); return topics.stream().filter((KafkaTopic p) -> (topicsFromConfigStore.stream() .anyMatch((KafkaTopic q) -> q.getName().equalsIgnoreCase(p.getName())))).collect(toList()); } return topics; }
private List<WorkUnit> getWorkUnitsForTopic(KafkaTopic topic, SourceState state, Optional<State> topicSpecificState) { Timer.Context context = this.metricContext.timer("isTopicQualifiedTimer").time(); boolean topicQualified = isTopicQualified(topic); context.close(); List<WorkUnit> workUnits = Lists.newArrayList(); for (KafkaPartition partition : topic.getPartitions()) { WorkUnit workUnit = getWorkUnitForTopicPartition(partition, state, topicSpecificState); this.partitionsToBeProcessed.add(partition); if (workUnit != null) { // For disqualified topics, for each of its workunits set the high watermark to be the same // as the low watermark, so that it will be skipped. if (!topicQualified) { skipWorkUnit(workUnit); } workUnits.add(workUnit); } } return workUnits; }
@Override public List<KafkaTopic> getTopics() { List<TopicMetadata> topicMetadataList = getFilteredMetadataList(); List<KafkaTopic> filteredTopics = Lists.newArrayList(); for (TopicMetadata topicMetadata : topicMetadataList) { List<KafkaPartition> partitions = getPartitionsForTopic(topicMetadata); filteredTopics.add(new KafkaTopic(topicMetadata.topic(), partitions)); } return filteredTopics; }
.filter((KafkaTopic p) -> whitelistedTopics.contains(p.getName())) .collect(Collectors.toList()); } else if (properties.containsKey(GOBBLIN_CONFIG_TAGS_BLACKLIST)) { .forEach(((URI u) -> blacklistedTopics.add(ConfigStoreUtils.getTopicNameFromURI(u)))); return allTopics.stream() .filter((KafkaTopic p) -> !blacklistedTopics.contains(p.getName())) .collect(Collectors.toList()); } else {
@Override public List<KafkaTopic> getFilteredTopics(List<Pattern> blacklist, List<Pattern> whitelist) { List<TopicMetadata> topicMetadataList = getFilteredMetadataList(blacklist, whitelist); List<KafkaTopic> filteredTopics = Lists.newArrayList(); for (TopicMetadata topicMetadata : topicMetadataList) { List<KafkaPartition> partitions = getPartitionsForTopic(topicMetadata); filteredTopics.add(new KafkaTopic(topicMetadata.topic(), partitions)); } return filteredTopics; }
@Override public String apply(KafkaTopic topic) { return topic.getName(); } }), state);
@Override public boolean apply(@Nonnull KafkaTopic kafkaTopic) { return DatasetFilterUtils.survived(kafkaTopic.getName(), blacklist, whitelist); } }));
/** * If config store is enabled, then intersection of topics from blacklisting/whitelisting will be taken against * the topics from config-store */ private List<KafkaTopic> getFilteredTopics(SourceState state) { List<Pattern> blacklist = DatasetFilterUtils.getPatternList(state, TOPIC_BLACKLIST); List<Pattern> whitelist = DatasetFilterUtils.getPatternList(state, TOPIC_WHITELIST); List<KafkaTopic> topics = this.kafkaConsumerClient.get().getFilteredTopics(blacklist, whitelist); Optional<String> configStoreUri = ConfigStoreUtils.getConfigStoreUri(state.getProperties()); if (configStoreUri.isPresent()) { List<KafkaTopic> topicsFromConfigStore = ConfigStoreUtils .getTopicsFromConfigStore(state.getProperties(), configStoreUri.get(), this.kafkaConsumerClient.get()); return topics.stream().filter((KafkaTopic p) -> (topicsFromConfigStore.stream() .anyMatch((KafkaTopic q) -> q.getName().equalsIgnoreCase(p.getName())))).collect(toList()); } return topics; }
.filter((KafkaTopic p) -> whitelistedTopics.contains(p.getName())) .collect(Collectors.toList()); } else if (properties.containsKey(GOBBLIN_CONFIG_TAGS_BLACKLIST)) { .forEach(((URI u) -> blacklistedTopics.add(ConfigStoreUtils.getTopicNameFromURI(u)))); return allTopics.stream() .filter((KafkaTopic p) -> !blacklistedTopics.contains(p.getName())) .collect(Collectors.toList()); } else {