public KafkaSimpleStreamingExtractor(WorkUnitState state) { super(state); _consumer = KafkaSimpleStreamingSource.getKafkaConsumer(ConfigUtils.propertiesToConfig(state.getProperties())); closer.register(_consumer); _partition = new TopicPartition(KafkaSimpleStreamingSource.getTopicNameFromState(state), KafkaSimpleStreamingSource.getPartitionIdFromState(state)); _consumer.assign(Collections.singletonList(_partition)); this._schemaRegistry = state.contains(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_CLASS) ? Optional .of(KafkaSchemaRegistry.<String, S>get(state.getProperties())) : Optional.<KafkaSchemaRegistry<String, S>>absent(); this.fetchTimeOut = state.getPropAsLong(AbstractBaseKafkaConsumerClient.CONFIG_KAFKA_FETCH_TIMEOUT_VALUE, AbstractBaseKafkaConsumerClient.CONFIG_KAFKA_FETCH_TIMEOUT_VALUE_DEFAULT); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { Config config = ConfigUtils.propertiesToConfig(state.getProperties()); Consumer<String, byte[]> consumer = getKafkaConsumer(config); LOG.debug("Consumer is {}", consumer); String topic = ConfigUtils.getString(config, TOPIC_WHITELIST, StringUtils.EMPTY); // TODO: fix this to use the new API when KafkaWrapper is fixed List<WorkUnit> workUnits = new ArrayList<WorkUnit>(); List<PartitionInfo> topicPartitions; topicPartitions = consumer.partitionsFor(topic); LOG.info("Partition count is {}", topicPartitions.size()); for (PartitionInfo topicPartition : topicPartitions) { Extract extract = this.createExtract(DEFAULT_TABLE_TYPE, DEFAULT_NAMESPACE_NAME, topicPartition.topic()); LOG.info("Partition info is {}", topicPartition); WorkUnit workUnit = WorkUnit.create(extract); setTopicNameInState(workUnit, topicPartition.topic()); workUnit.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, topicPartition.topic()); setPartitionId(workUnit, topicPartition.partition()); workUnits.add(workUnit); } return workUnits; }
/** * Tests that the source creates workUnits appropriately. Sets up a topic with a single partition and checks that a * single workUnit is returned with the right parameters sets * @throws IOException * @throws InterruptedException */ @Test public void testSource() throws IOException, InterruptedException { String topic = "testSimpleStreamingSource"; _kafkaTestHelper.provisionTopic(topic); List<WorkUnit> lWu = getWorkUnits(topic); // Check we have a single WorkUnit with the right properties setup. Assert.assertEquals(lWu.size(), 1); WorkUnit wU = lWu.get(0); Assert.assertEquals(KafkaSimpleStreamingSource.getTopicNameFromState(wU), topic); Assert.assertEquals(KafkaSimpleStreamingSource.getPartitionIdFromState(wU), 0); }
private List<WorkUnit> getWorkUnits(String topic) { SourceState ss = new SourceState(); ss.setProp(ConfigurationKeys.KAFKA_BROKERS, "localhost:" + _kafkaTestHelper.getKafkaServerPort()); ss.setProp(KafkaSimpleStreamingSource.TOPIC_WHITELIST, topic); ss.setProp(ConfigurationKeys.JOB_NAME_KEY, topic); ss.setProp(KafkaSimpleStreamingSource.TOPIC_KEY_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer"); ss.setProp(KafkaSimpleStreamingSource.TOPIC_VALUE_DESERIALIZER, "org.apache.kafka.common.serialization.ByteArrayDeserializer"); KafkaSimpleStreamingSource<String, byte[]> simpleSource = new KafkaSimpleStreamingSource<String, byte[]>(); return simpleSource.getWorkunits(ss); }