@Override public long getExpectedRecordCount() { return this.lowWatermark.getGap(this.highWatermark); }
@SuppressWarnings("deprecation") protected static WatermarkInterval getWatermarkIntervalFromMultiWorkUnit(MultiWorkUnit multiWorkUnit) { List<Long> lowWatermarkValues = Lists.newArrayList(); List<Long> expectedHighWatermarkValues = Lists.newArrayList(); for (WorkUnit workUnit : multiWorkUnit.getWorkUnits()) { lowWatermarkValues.add(workUnit.getLowWaterMark()); expectedHighWatermarkValues.add(workUnit.getHighWaterMark()); } return new WatermarkInterval(new MultiLongWatermark(lowWatermarkValues), new MultiLongWatermark(expectedHighWatermarkValues)); }
private boolean currentPartitionFinished() { if (this.currentPartitionIdx == INITIAL_PARTITION_IDX) { return true; } else if (this.nextWatermark.get(this.currentPartitionIdx) >= this.highWatermark.get(this.currentPartitionIdx)) { LOG.info("Finished pulling partition " + this.getCurrentPartition()); return true; } else { return false; } }
MultiLongWatermark previousLowWatermark = workUnit.getLowWatermark(MultiLongWatermark.class); MultiLongWatermark previousExpectedHighWatermark = workUnit.getExpectedHighWatermark(MultiLongWatermark.class); Preconditions.checkArgument(partitions.size() == watermark.size(), String .format("Num of partitions doesn't match number of watermarks: partitions=%s, watermarks=%s", partitions, watermark)); KafkaPartition partition = partitions.get(i); if (watermark.get(i) != ConfigurationKeys.DEFAULT_WATERMARK_VALUE) { this.previousOffsets.put(partition, watermark.get(i)); if (previousLowWatermark.get(i) != ConfigurationKeys.DEFAULT_WATERMARK_VALUE) { this.previousLowWatermarks.put(partition, previousLowWatermark.get(i)); if (previousExpectedHighWatermark.get(i) != ConfigurationKeys.DEFAULT_WATERMARK_VALUE) { this.previousExpectedHighWatermarks.put(partition, previousExpectedHighWatermark.get(i));
private boolean allPartitionsFinished() { return this.currentPartitionIdx != INITIAL_PARTITION_IDX && this.currentPartitionIdx >= this.highWatermark.size(); }
MultiLongWatermark previousLowWatermark = workUnit.getLowWatermark(MultiLongWatermark.class); MultiLongWatermark previousExpectedHighWatermark = workUnit.getExpectedHighWatermark(MultiLongWatermark.class); Preconditions.checkArgument(partitions.size() == watermark.size(), String .format("Num of partitions doesn't match number of watermarks: partitions=%s, watermarks=%s", partitions, watermark)); KafkaPartition partition = partitions.get(i); if (watermark.get(i) != ConfigurationKeys.DEFAULT_WATERMARK_VALUE) { this.previousOffsets.put(partition, watermark.get(i)); if (previousLowWatermark.get(i) != ConfigurationKeys.DEFAULT_WATERMARK_VALUE) { this.previousLowWatermarks.put(partition, previousLowWatermark.get(i)); if (previousExpectedHighWatermark.get(i) != ConfigurationKeys.DEFAULT_WATERMARK_VALUE) { this.previousExpectedHighWatermarks.put(partition, previousExpectedHighWatermark.get(i));
private boolean allPartitionsFinished() { return this.currentPartitionIdx != INITIAL_PARTITION_IDX && this.currentPartitionIdx >= this.highWatermark.size(); }
@Override public Object getSchema() { try { LOG.info("Getting schema for {}. Gap: {} HighWaterMark: {}", this.topicName, this.lowWatermark.getGap(this.highWatermark)); //If HighWatermark equals LowWatermark that might mean the workunit is an empty workunit if (this.lowWatermark.getGap(this.highWatermark) == 0) { LOG.info("Not getting schema for {} as the gap between high and low watermark is 0", this.topicName); return null; } return this.kafkaSchemaRegistry.getLatestSchemaByTopic(this.topicName); } catch (SchemaRegistryException e) { throw new RuntimeException(e); } }
@SuppressWarnings("deprecation") protected static WatermarkInterval getWatermarkIntervalFromWorkUnit(WorkUnit workUnit) { if (workUnit instanceof MultiWorkUnit) { return getWatermarkIntervalFromMultiWorkUnit((MultiWorkUnit) workUnit); } List<Long> lowWatermarkValues = Lists.newArrayList(workUnit.getLowWaterMark()); List<Long> expectedHighWatermarkValues = Lists.newArrayList(workUnit.getHighWaterMark()); return new WatermarkInterval(new MultiLongWatermark(lowWatermarkValues), new MultiLongWatermark(expectedHighWatermarkValues)); }
/** * Given a low watermark (starting point) and a high watermark (target), returns the percentage * of events pulled. * * @return a percentage value between 0 and 100. */ @Override public short calculatePercentCompletion(Watermark lowWatermark, Watermark highWatermark) { Preconditions.checkArgument( lowWatermark instanceof MultiLongWatermark && highWatermark instanceof MultiLongWatermark, String.format("Arguments of %s.%s must be of type %s", MultiLongWatermark.class.getSimpleName(), Thread.currentThread().getStackTrace()[1].getMethodName(), MultiLongWatermark.class.getSimpleName())); long pulled = ((MultiLongWatermark) lowWatermark).getGap(this); long all = ((MultiLongWatermark) lowWatermark).getGap((MultiLongWatermark) highWatermark); Preconditions.checkState(all > 0); long percent = Math.min(100, LongMath.divide(pulled * 100, all, RoundingMode.HALF_UP)); return (short) percent; }
this.lowWatermark = state.getWorkunit().getLowWatermark(MultiLongWatermark.class); this.highWatermark = state.getWorkunit().getExpectedHighWatermark(MultiLongWatermark.class); this.nextWatermark = new MultiLongWatermark(this.lowWatermark); this.kafkaConsumerClientResolver = new ClassAliasResolver<>(GobblinKafkaConsumerClientFactory.class); try {
/** * Record the avg time per record for the current partition, then increment this.currentPartitionIdx, * and switch metric context to the new partition. */ private void moveToNextPartition() { if (this.currentPartitionIdx == INITIAL_PARTITION_IDX) { LOG.info("Pulling topic " + this.topicName); this.currentPartitionIdx = 0; } else { updateStatisticsForCurrentPartition(); this.currentPartitionIdx++; this.currentPartitionRecordCount = 0; this.currentPartitionTotalSize = 0; this.currentPartitionDecodeRecordTime = 0; this.currentPartitionFetchMessageBufferTime = 0; this.currentPartitionReadRecordTime = 0; this.currentPartitionLastSuccessfulRecord = null; } this.messageIterator = null; if (this.currentPartitionIdx < this.partitions.size()) { LOG.info(String.format("Pulling partition %s from offset %d to %d, range=%d", this.getCurrentPartition(), this.nextWatermark.get(this.currentPartitionIdx), this.highWatermark.get(this.currentPartitionIdx), this.highWatermark.get(this.currentPartitionIdx) - this.nextWatermark.get(this.currentPartitionIdx))); switchMetricContextToCurrentPartition(); } if (!allPartitionsFinished()) { this.startFetchEpochTime.put(this.getCurrentPartition(), System.currentTimeMillis()); } }
@Override public long getExpectedRecordCount() { return this.lowWatermark.getGap(this.highWatermark); }
private WorkUnitState getMockWorkUnitState(Long lowWaterMark, Long highWaterMark) { WorkUnit mockWorkUnit = WorkUnit.createEmpty(); mockWorkUnit.setWatermarkInterval(new WatermarkInterval(new MultiLongWatermark(new ArrayList<Long>(){{add(lowWaterMark);}}), new MultiLongWatermark(new ArrayList<Long>(){{add(highWaterMark);}}))); WorkUnitState mockWorkUnitState = new WorkUnitState(mockWorkUnit, new State()); mockWorkUnitState.setProp(KafkaSource.TOPIC_NAME, TEST_TOPIC_NAME); mockWorkUnitState.setProp(KafkaSource.PARTITION_ID, "1"); mockWorkUnitState.setProp(ConfigurationKeys.KAFKA_BROKERS, "localhost:8080"); mockWorkUnitState.setProp(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_URL, TEST_URL); return mockWorkUnitState; }
@Override public void close() throws IOException { if (currentPartitionIdx != INITIAL_PARTITION_IDX) { updateStatisticsForCurrentPartition(); } Map<KafkaPartition, Map<String, String>> tagsForPartitionsMap = Maps.newHashMap(); // Add error partition count and error message count to workUnitState this.workUnitState.setProp(ConfigurationKeys.ERROR_PARTITION_COUNT, this.errorPartitions.size()); this.workUnitState.setProp(ConfigurationKeys.ERROR_MESSAGE_UNDECODABLE_COUNT, this.undecodableMessageCount); for (int i = 0; i < this.partitions.size(); i++) { LOG.info(String.format("Actual high watermark for partition %s=%d, expected=%d", this.partitions.get(i), this.nextWatermark.get(i), this.highWatermark.get(i))); tagsForPartitionsMap.put(this.partitions.get(i), createTagsForPartition(i)); } this.workUnitState.setActualHighWatermark(this.nextWatermark); if (isInstrumentationEnabled()) { for (Map.Entry<KafkaPartition, Map<String, String>> eventTags : tagsForPartitionsMap.entrySet()) { new EventSubmitter.Builder(getMetricContext(), GOBBLIN_KAFKA_NAMESPACE).build() .submit(KAFKA_EXTRACTOR_TOPIC_METADATA_EVENT_NAME, eventTags.getValue()); } } this.closer.close(); }
@Override public Object getSchema() { try { LOG.info("Getting schema for {}. Gap: {} HighWaterMark: {}", this.topicName, this.lowWatermark.getGap(this.highWatermark)); //If HighWatermark equals LowWatermark that might mean the workunit is an empty workunit if (this.lowWatermark.getGap(this.highWatermark) == 0) { LOG.info("Not getting schema for {} as the gap between high and low watermark is 0", this.topicName); return null; } return this.kafkaSchemaRegistry.getLatestSchemaByTopic(this.topicName); } catch (SchemaRegistryException e) { throw new RuntimeException(e); } }
@SuppressWarnings("deprecation") protected static WatermarkInterval getWatermarkIntervalFromMultiWorkUnit(MultiWorkUnit multiWorkUnit) { List<Long> lowWatermarkValues = Lists.newArrayList(); List<Long> expectedHighWatermarkValues = Lists.newArrayList(); for (WorkUnit workUnit : multiWorkUnit.getWorkUnits()) { lowWatermarkValues.add(workUnit.getLowWaterMark()); expectedHighWatermarkValues.add(workUnit.getHighWaterMark()); } return new WatermarkInterval(new MultiLongWatermark(lowWatermarkValues), new MultiLongWatermark(expectedHighWatermarkValues)); }