@Override public void close() { super.close(); close(false); }
/** * Kafka will call this method whenever it is about to rebalance the * consumers for the given partitions. We'll simply take this to mean that * we need to quickly commit what we've got and will return the consumer to * the pool. This method will be called during the poll() method call of * this class and will be called by the same thread calling poll according * to the Kafka API docs. After this method executes the session and kafka * offsets are committed and this lease is closed. * * @param partitions partitions being reassigned */ @Override public void onPartitionsRevoked(final Collection<TopicPartition> partitions) { logger.debug("Rebalance Alert: Paritions '{}' revoked for lease '{}' with consumer '{}'", new Object[]{partitions, this, kafkaConsumer}); //force a commit here. Can reuse the session and consumer after this but must commit now to avoid duplicates if kafka reassigns partition commit(); }
resetInternalState(); return false; final Collection<FlowFile> bundledFlowFiles = getBundles(); if (!bundledFlowFiles.isEmpty()) { getProcessSession().transfer(bundledFlowFiles, REL_SUCCESS); getProcessSession().commit(); kafkaConsumer.commitSync(uncommittedOffsetsMap); resetInternalState(); return true; } catch (final KafkaException kex) { poison(); logger.warn("Duplicates are likely as we were able to commit the process" + " session but received an exception from Kafka while committing" throw kex; } catch (final Throwable t) { poison(); throw t;
private void processRecords(final ConsumerRecords<byte[], byte[]> records) { records.partitions().stream().forEach(partition -> { List<ConsumerRecord<byte[], byte[]>> messages = records.records(partition); if (!messages.isEmpty()) { //update maximum offset map for this topic partition long maxOffset = messages.stream() .mapToLong(record -> record.offset()) .max() .getAsLong(); //write records to content repository and session if (demarcatorBytes != null) { writeDemarcatedData(getProcessSession(), messages, partition); } else if (readerFactory != null && writerFactory != null) { writeRecordData(getProcessSession(), messages, partition); } else { messages.stream().forEach(message -> { writeData(getProcessSession(), message, partition); }); } totalMessages += messages.size(); uncommittedOffsetsMap.put(partition, new OffsetAndMetadata(maxOffset + 1L)); } }); }
/** * Executes a poll on the underlying Kafka Consumer and creates any new * flowfiles necessary or appends to existing ones if in demarcation mode. */ void poll() { /** * Implementation note: * Even if ConsumeKafka is not scheduled to poll due to downstream connection back-pressure is engaged, * for longer than session.timeout.ms (defaults to 10 sec), Kafka consumer sends heartbeat from background thread. * If this situation lasts longer than max.poll.interval.ms (defaults to 5 min), Kafka consumer sends * Leave Group request to Group Coordinator. When ConsumeKafka processor is scheduled again, Kafka client checks * if this client instance is still a part of consumer group. If not, it rejoins before polling messages. * This behavior has been fixed via Kafka KIP-62 and available from Kafka client 0.10.1.0. */ try { final ConsumerRecords<byte[], byte[]> records = kafkaConsumer.poll(10); lastPollEmpty = records.count() == 0; processRecords(records); } catch (final ProcessException pe) { throw pe; } catch (final Throwable t) { this.poison(); throw t; } }
while (this.isScheduled() && lease.continuePolling()) { lease.poll(); if (this.isScheduled() && !lease.commit()) { context.yield();
private boolean processBundle(final BundleTracker bundle) throws IOException { final RecordSetWriter writer = bundle.recordWriter; if (writer != null) { final WriteResult writeResult; try { writeResult = writer.finishRecordSet(); } finally { writer.close(); } if (writeResult.getRecordCount() == 0) { getProcessSession().remove(bundle.flowFile); return false; } final Map<String, String> attributes = new HashMap<>(); attributes.putAll(writeResult.getAttributes()); attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType()); bundle.flowFile = getProcessSession().putAllAttributes(bundle.flowFile, attributes); } populateAttributes(bundle); return true; }
private void writeData(final ProcessSession session, ConsumerRecord<byte[], byte[]> record, final TopicPartition topicPartition) { FlowFile flowFile = session.create(); final BundleTracker tracker = new BundleTracker(record, topicPartition, keyEncoding); tracker.incrementRecordCount(1); final byte[] value = record.value(); if (value != null) { flowFile = session.write(flowFile, out -> { out.write(value); }); } flowFile = session.putAllAttributes(flowFile, getAttributes(record)); tracker.updateFlowFile(flowFile); populateAttributes(tracker); session.transfer(tracker.flowFile, REL_SUCCESS); }
/** * Abstract method that is intended to be extended by the pool that created * this ConsumerLease object. It should ensure that the session given to * create this session is rolled back and that the underlying kafka consumer * is either returned to the pool for continued use or destroyed if this * lease has been poisoned. It can only be called once. Calling it more than * once can result in undefined and non threadsafe behavior. */ @Override public void close() { resetInternalState(); }
private void processRecords(final ConsumerRecords<byte[], byte[]> records) { records.partitions().stream().forEach(partition -> { List<ConsumerRecord<byte[], byte[]>> messages = records.records(partition); if (!messages.isEmpty()) { //update maximum offset map for this topic partition long maxOffset = messages.stream() .mapToLong(record -> record.offset()) .max() .getAsLong(); uncommittedOffsetsMap.put(partition, new OffsetAndMetadata(maxOffset + 1L)); //write records to content repository and session if (demarcatorBytes == null) { totalFlowFiles += messages.size(); messages.stream().forEach(message -> { writeData(getProcessSession(), message, partition); }); } else { writeData(getProcessSession(), messages, partition); } } }); }
private Collection<FlowFile> getBundles() { final List<FlowFile> flowFiles = new ArrayList<>(); for (final BundleTracker tracker : bundleMap.values()) { populateAttributes(tracker); flowFiles.add(tracker.flowFile); } return flowFiles; }
private void populateAttributes(final BundleTracker tracker) { final Map<String, String> kafkaAttrs = new HashMap<>(); kafkaAttrs.put(KafkaProcessorUtils.KAFKA_OFFSET, String.valueOf(tracker.initialOffset)); if (tracker.key != null && tracker.totalRecords == 1) { kafkaAttrs.put(KafkaProcessorUtils.KAFKA_KEY, tracker.key); } kafkaAttrs.put(KafkaProcessorUtils.KAFKA_PARTITION, String.valueOf(tracker.partition)); kafkaAttrs.put(KafkaProcessorUtils.KAFKA_TOPIC, tracker.topic); if (tracker.totalRecords > 1) { kafkaAttrs.put(KafkaProcessorUtils.KAFKA_COUNT, String.valueOf(tracker.totalRecords)); } final FlowFile newFlowFile = getProcessSession().putAllAttributes(tracker.flowFile, kafkaAttrs); final long executionDurationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos); final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, tracker.topic); getProcessSession().getProvenanceReporter().receive(newFlowFile, transitUri, executionDurationMillis); tracker.updateFlowFile(newFlowFile); }
while (this.isScheduled() && lease.continuePolling()) { lease.poll(); if (this.isScheduled() && !lease.commit()) { context.yield();
private void processRecords(final ConsumerRecords<byte[], byte[]> records) { records.partitions().stream().forEach(partition -> { List<ConsumerRecord<byte[], byte[]>> messages = records.records(partition); if (!messages.isEmpty()) { //update maximum offset map for this topic partition long maxOffset = messages.stream() .mapToLong(record -> record.offset()) .max() .getAsLong(); //write records to content repository and session if (demarcatorBytes != null) { writeDemarcatedData(getProcessSession(), messages, partition); } else if (readerFactory != null && writerFactory != null) { writeRecordData(getProcessSession(), messages, partition); } else { messages.stream().forEach(message -> { writeData(getProcessSession(), message, partition); }); } totalMessages += messages.size(); uncommittedOffsetsMap.put(partition, new OffsetAndMetadata(maxOffset + 1L)); } }); }
/** * Executes a poll on the underlying Kafka Consumer and creates any new * flowfiles necessary or appends to existing ones if in demarcation mode. */ void poll() { /** * Implementation note: * Even if ConsumeKafka is not scheduled to poll due to downstream connection back-pressure is engaged, * for longer than session.timeout.ms (defaults to 10 sec), Kafka consumer sends heartbeat from background thread. * If this situation lasts longer than max.poll.interval.ms (defaults to 5 min), Kafka consumer sends * Leave Group request to Group Coordinator. When ConsumeKafka processor is scheduled again, Kafka client checks * if this client instance is still a part of consumer group. If not, it rejoins before polling messages. * This behavior has been fixed via Kafka KIP-62 and available from Kafka client 0.10.1.0. */ try { final ConsumerRecords<byte[], byte[]> records = kafkaConsumer.poll(10); lastPollEmpty = records.count() == 0; processRecords(records); } catch (final ProcessException pe) { throw pe; } catch (final Throwable t) { this.poison(); throw t; } }
private boolean processBundle(final BundleTracker bundle) throws IOException { final RecordSetWriter writer = bundle.recordWriter; if (writer != null) { final WriteResult writeResult; try { writeResult = writer.finishRecordSet(); } finally { writer.close(); } if (writeResult.getRecordCount() == 0) { getProcessSession().remove(bundle.flowFile); return false; } final Map<String, String> attributes = new HashMap<>(); attributes.putAll(writeResult.getAttributes()); attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType()); bundle.flowFile = getProcessSession().putAllAttributes(bundle.flowFile, attributes); } populateAttributes(bundle); return true; }
private void writeData(final ProcessSession session, ConsumerRecord<byte[], byte[]> record, final TopicPartition topicPartition) { FlowFile flowFile = session.create(); final BundleTracker tracker = new BundleTracker(record, topicPartition, keyEncoding); tracker.incrementRecordCount(1); final byte[] value = record.value(); if (value != null) { flowFile = session.write(flowFile, out -> { out.write(value); }); } flowFile = session.putAllAttributes(flowFile, getAttributes(record)); tracker.updateFlowFile(flowFile); populateAttributes(tracker); session.transfer(tracker.flowFile, REL_SUCCESS); }
/** * Abstract method that is intended to be extended by the pool that created * this ConsumerLease object. It should ensure that the session given to * create this session is rolled back and that the underlying kafka consumer * is either returned to the pool for continued use or destroyed if this * lease has been poisoned. It can only be called once. Calling it more than * once can result in undefined and non threadsafe behavior. */ @Override public void close() { resetInternalState(); }
private void writeData(final ProcessSession session, ConsumerRecord<byte[], byte[]> record, final TopicPartition topicPartition) { FlowFile flowFile = session.create(); final BundleTracker tracker = new BundleTracker(record, topicPartition, keyEncoding); tracker.incrementRecordCount(1); final byte[] value = record.value(); if (value != null) { flowFile = session.write(flowFile, out -> { out.write(value); }); } tracker.updateFlowFile(flowFile); populateAttributes(tracker); session.transfer(tracker.flowFile, REL_SUCCESS); }
private void populateAttributes(final BundleTracker tracker) { final Map<String, String> kafkaAttrs = new HashMap<>(); kafkaAttrs.put(KafkaProcessorUtils.KAFKA_OFFSET, String.valueOf(tracker.initialOffset)); if (tracker.key != null && tracker.totalRecords == 1) { kafkaAttrs.put(KafkaProcessorUtils.KAFKA_KEY, tracker.key); } kafkaAttrs.put(KafkaProcessorUtils.KAFKA_PARTITION, String.valueOf(tracker.partition)); kafkaAttrs.put(KafkaProcessorUtils.KAFKA_TOPIC, tracker.topic); if (tracker.totalRecords > 1) { // Add a record.count attribute to remain consistent with other record-oriented processors. If not // reading/writing records, then use "kafka.count" attribute. if (tracker.recordWriter == null) { kafkaAttrs.put(KafkaProcessorUtils.KAFKA_COUNT, String.valueOf(tracker.totalRecords)); } else { kafkaAttrs.put("record.count", String.valueOf(tracker.totalRecords)); } } final FlowFile newFlowFile = getProcessSession().putAllAttributes(tracker.flowFile, kafkaAttrs); final long executionDurationMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - leaseStartNanos); final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, tracker.topic); getProcessSession().getProvenanceReporter().receive(newFlowFile, transitUri, executionDurationMillis); tracker.updateFlowFile(newFlowFile); }