/** * record the ssp and the offset. Do not submit it to the consumer yet. * @param systemStreamPartition ssp to register * @param offset offset to register with */ @Override public void register(SystemStreamPartition systemStreamPartition, String offset) { if (started.get()) { String msg = String.format("%s: Trying to register partition after consumer has been started. ssp=%s", this, systemStreamPartition); throw new SamzaException(msg); } if (!systemStreamPartition.getSystem().equals(systemName)) { LOG.warn("{}: ignoring SSP {}, because this consumer's system doesn't match.", this, systemStreamPartition); return; } LOG.info("{}: Registering ssp = {} with offset {}", this, systemStreamPartition, offset); super.register(systemStreamPartition, offset); TopicPartition tp = toTopicPartition(systemStreamPartition); topicPartitionsToSSP.put(tp, systemStreamPartition); String existingOffset = topicPartitionsToOffset.get(tp); // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages. if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) { topicPartitionsToOffset.put(tp, offset); } metrics.registerTopicAndPartition(toTopicAndPartition(tp)); }
@Override public void start() { if (!started.compareAndSet(false, true)) { LOG.warn("{}: Attempting to start the consumer for the second (or more) time.", this); return; } if (stopped.get()) { LOG.error("{}: Attempting to start a stopped consumer", this); return; } // initialize the subscriptions for all the registered TopicPartitions startSubscription(); // needs to be called after all the registrations are completed setFetchThresholds(); startConsumer(); LOG.info("{}: Consumer started", this); }
KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES); consumer.register(ssp0, "0"); consumer.register(ssp1, "0"); consumer.start(); consumer.messageSink.addMessage(ssp0, ime0); Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0)); Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1)); Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0)); Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1)); consumer.stop();
/** * Add new partition to the list of polled partitions. * Bust only be called before {@link KafkaConsumerProxy#start} is called.. */ public void addTopicPartition(SystemStreamPartition ssp, long nextOffset) { LOG.info(String.format("Adding new topicPartition %s with offset %s to queue for consumer %s", ssp, nextOffset, this)); topicPartitionToSSP.put(KafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs // this is already vetted offset so there is no need to validate it nextOffsets.put(ssp, nextOffset); kafkaConsumerMetrics.setNumTopicPartitions(metricName, nextOffsets.size()); }
@Test public void testFetchThresholdShouldDivideEvenlyAmongPartitions() { final KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES); final int partitionsNum = 50; for (int i = 0; i < partitionsNum; i++) { consumer.register(new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(i)), "0"); } consumer.start(); Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_MSGS) / partitionsNum, consumer.perPartitionFetchThreshold); Assert.assertEquals(Long.valueOf(FETCH_THRESHOLD_BYTES) / 2 / partitionsNum, consumer.perPartitionFetchThresholdBytes); consumer.stop(); }
@Test public void testConsumerRegisterOlderOffsetOfTheSamzaSSP() { KafkaSystemConsumer consumer = createConsumer(FETCH_THRESHOLD_MSGS, FETCH_THRESHOLD_BYTES); SystemStreamPartition ssp0 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(0)); SystemStreamPartition ssp1 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(1)); SystemStreamPartition ssp2 = new SystemStreamPartition(TEST_SYSTEM, TEST_STREAM, new Partition(2)); consumer.register(ssp0, "0"); consumer.register(ssp0, "5"); consumer.register(ssp1, "2"); consumer.register(ssp1, "3"); consumer.register(ssp2, "0"); assertEquals("0", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp0))); assertEquals("2", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp1))); assertEquals("0", consumer.topicPartitionsToOffset.get(KafkaSystemConsumer.toTopicPartition(ssp2))); }
@Override public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll( Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException { // check if the proxy is running if (!proxy.isRunning()) { LOG.info("{}: KafkaConsumerProxy is not running. Stopping the consumer.", this); stop(); String message = String.format("%s: KafkaConsumerProxy has stopped.", this); throw new SamzaException(message, proxy.getFailureCause()); } return super.poll(systemStreamPartitions, timeout); }
private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) { TopicAndPartition tap = KafkaSystemConsumer.toTopicAndPartition(tp); SystemStreamPartition ssp = new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition())); Long lag = latestLags.get(ssp); if (lag == null) { throw new SamzaException("Unknown/unregistered ssp in latestLags. ssp=" + ssp + "; system=" + systemName); } long currentSSPLag = lag.longValue(); // lag between the current offset and the highwatermark if (currentSSPLag < 0) { return; } long recordOffset = r.offset(); long highWatermark = recordOffset + currentSSPLag; // derived value for the highwatermark int size = getRecordSize(r); kafkaConsumerMetrics.incReads(tap); kafkaConsumerMetrics.incBytesReads(tap, size); kafkaConsumerMetrics.setOffsets(tap, recordOffset); kafkaConsumerMetrics.incClientBytesReads(metricName, size); kafkaConsumerMetrics.setHighWatermarkValue(tap, highWatermark); }
consumer.register(ssp0, "0"); consumer.register(ssp1, "0"); consumer.start(); consumer.messageSink.addMessage(ssp0, ime0); Assert.assertEquals(1, consumer.getNumMessagesInQueue(ssp0)); Assert.assertEquals(2, consumer.getNumMessagesInQueue(ssp1)); Assert.assertEquals(ime0Size, consumer.getMessagesSizeInQueue(ssp0)); Assert.assertEquals(ime1Size + ime11Size, consumer.getMessagesSizeInQueue(ssp1)); consumer.stop();
/** * Add new partition to the list of polled partitions. * Bust only be called before {@link KafkaConsumerProxy#start} is called.. */ public void addTopicPartition(SystemStreamPartition ssp, long nextOffset) { LOG.info(String.format("Adding new topicPartition %s with offset %s to queue for consumer %s", ssp, nextOffset, this)); topicPartitionToSSP.put(KafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs // this is already vetted offset so there is no need to validate it nextOffsets.put(ssp, nextOffset); kafkaConsumerMetrics.setNumTopicPartitions(metricName, nextOffsets.size()); }
@Override public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll( Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException { // check if the proxy is running if (!proxy.isRunning()) { stop(); String message = String.format("%s: KafkaConsumerProxy has stopped.", this); throw new SamzaException(message, proxy.getFailureCause()); } return super.poll(systemStreamPartitions, timeout); }
private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) { TopicAndPartition tap = KafkaSystemConsumer.toTopicAndPartition(tp); SystemStreamPartition ssp = new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition())); Long lag = latestLags.get(ssp); if (lag == null) { throw new SamzaException("Unknown/unregistered ssp in latestLags. ssp=" + ssp + "; system=" + systemName); } long currentSSPLag = lag.longValue(); // lag between the current offset and the highwatermark if (currentSSPLag < 0) { return; } long recordOffset = r.offset(); long highWatermark = recordOffset + currentSSPLag; // derived value for the highwatermark int size = getRecordSize(r); kafkaConsumerMetrics.incReads(tap); kafkaConsumerMetrics.incBytesReads(tap, size); kafkaConsumerMetrics.setOffsets(tap, recordOffset); kafkaConsumerMetrics.incClientBytesReads(metricName, size); kafkaConsumerMetrics.setHighWatermarkValue(tap, highWatermark); }
/** * record the ssp and the offset. Do not submit it to the consumer yet. * @param systemStreamPartition ssp to register * @param offset offset to register with */ @Override public void register(SystemStreamPartition systemStreamPartition, String offset) { if (started.get()) { String msg = String.format("%s: Trying to register partition after consumer has been started. ssp=%s", this, systemStreamPartition); throw new SamzaException(msg); } if (!systemStreamPartition.getSystem().equals(systemName)) { LOG.warn("{}: ignoring SSP {}, because this consumer's system doesn't match.", this, systemStreamPartition); return; } LOG.info("{}: Registering ssp = {} with offset {}", this, systemStreamPartition, offset); super.register(systemStreamPartition, offset); TopicPartition tp = toTopicPartition(systemStreamPartition); topicPartitionsToSSP.put(tp, systemStreamPartition); String existingOffset = topicPartitionsToOffset.get(tp); // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages. if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) { topicPartitionsToOffset.put(tp, offset); } metrics.registerTopicAndPartition(toTopicAndPartition(tp)); }
@Override public void start() { if (!started.compareAndSet(false, true)) { LOG.warn("{}: Attempting to start the consumer for the second (or more) time.", this); return; } if (stopped.get()) { LOG.error("{}: Attempting to start a stopped consumer", this); return; } // initialize the subscriptions for all the registered TopicPartitions startSubscription(); // needs to be called after all the registrations are completed setFetchThresholds(); startConsumer(); LOG.info("{}: Consumer started", this); }
/** * Add new partition to the list of polled partitions. * Bust only be called before {@link KafkaConsumerProxy#start} is called.. */ public void addTopicPartition(SystemStreamPartition ssp, long nextOffset) { LOG.info(String.format("Adding new topicPartition %s with offset %s to queue for consumer %s", ssp, nextOffset, this)); topicPartitionToSSP.put(KafkaSystemConsumer.toTopicPartition(ssp), ssp); //registered SSPs // this is already vetted offset so there is no need to validate it nextOffsets.put(ssp, nextOffset); kafkaConsumerMetrics.setNumTopicPartitions(metricName, nextOffsets.size()); }
@Override public Map<SystemStreamPartition, List<IncomingMessageEnvelope>> poll( Set<SystemStreamPartition> systemStreamPartitions, long timeout) throws InterruptedException { // check if the proxy is running if (!proxy.isRunning()) { stop(); String message = String.format("%s: KafkaConsumerProxy has stopped.", this); throw new SamzaException(message, proxy.getFailureCause()); } return super.poll(systemStreamPartitions, timeout); }
private void updateMetrics(ConsumerRecord<K, V> r, TopicPartition tp) { TopicAndPartition tap = KafkaSystemConsumer.toTopicAndPartition(tp); SystemStreamPartition ssp = new SystemStreamPartition(systemName, tp.topic(), new Partition(tp.partition())); Long lag = latestLags.get(ssp); if (lag == null) { throw new SamzaException("Unknown/unregistered ssp in latestLags. ssp=" + ssp + "; system=" + systemName); } long currentSSPLag = lag.longValue(); // lag between the current offset and the highwatermark if (currentSSPLag < 0) { return; } long recordOffset = r.offset(); long highWatermark = recordOffset + currentSSPLag; // derived value for the highwatermark int size = getRecordSize(r); kafkaConsumerMetrics.incReads(tap); kafkaConsumerMetrics.incBytesReads(tap, size); kafkaConsumerMetrics.setOffsets(tap, recordOffset); kafkaConsumerMetrics.incClientBytesReads(metricName, size); kafkaConsumerMetrics.setHighWatermarkValue(tap, highWatermark); }
/** * record the ssp and the offset. Do not submit it to the consumer yet. * @param systemStreamPartition ssp to register * @param offset offset to register with */ @Override public void register(SystemStreamPartition systemStreamPartition, String offset) { if (started.get()) { String msg = String.format("%s: Trying to register partition after consumer has been started. ssp=%s", this, systemStreamPartition); throw new SamzaException(msg); } if (!systemStreamPartition.getSystem().equals(systemName)) { LOG.warn("{}: ignoring SSP {}, because this consumer's system doesn't match.", this, systemStreamPartition); return; } LOG.info("{}: Registering ssp = {} with offset {}", this, systemStreamPartition, offset); super.register(systemStreamPartition, offset); TopicPartition tp = toTopicPartition(systemStreamPartition); topicPartitionsToSSP.put(tp, systemStreamPartition); String existingOffset = topicPartitionsToOffset.get(tp); // register the older (of the two) offset in the consumer, to guarantee we do not miss any messages. if (existingOffset == null || compareOffsets(existingOffset, offset) > 0) { topicPartitionsToOffset.put(tp, offset); } metrics.registerTopicAndPartition(toTopicAndPartition(tp)); }
@Override public void start() { if (!started.compareAndSet(false, true)) { LOG.warn("{}: Attempting to start the consumer for the second (or more) time.", this); return; } if (stopped.get()) { LOG.error("{}: Attempting to start a stopped consumer", this); return; } // initialize the subscriptions for all the registered TopicPartitions startSubscription(); // needs to be called after all the registrations are completed setFetchThresholds(); startConsumer(); LOG.info("{}: Consumer started", this); }
private void populateCurrentLags(Set<SystemStreamPartition> ssps) { Map<MetricName, ? extends Metric> consumerMetrics = kafkaConsumer.metrics(); // populate the MetricNames first time if (perPartitionMetrics.isEmpty()) { HashMap<String, String> tags = new HashMap<>(); tags.put("client-id", clientId); // this is required by the KafkaConsumer to get the metrics for (SystemStreamPartition ssp : ssps) { TopicPartition tp = KafkaSystemConsumer.toTopicPartition(ssp); perPartitionMetrics.put(ssp, new MetricName(tp + ".records-lag", "consumer-fetch-manager-metrics", "", tags)); } } for (SystemStreamPartition ssp : ssps) { MetricName mn = perPartitionMetrics.get(ssp); Metric currentLagMetric = consumerMetrics.get(mn); // High watermark is fixed to be the offset of last available message, // so the lag is now at least 0, which is the same as Samza's definition. // If the lag is not 0, then isAtHead is not true, and kafkaClient keeps polling. long currentLag = (currentLagMetric != null) ? (long) currentLagMetric.value() : -1L; latestLags.put(ssp, currentLag); // calls the setIsAtHead for the BlockingEnvelopeMap sink.setIsAtHighWatermark(ssp, currentLag == 0); } }