/** * Get a property as long from a work unit that may or may not be a multiworkunit. * This method is needed because the SingleLevelWorkUnitPacker does not squeeze work units * into a multiworkunit, and thus does not append the partitionId to property keys, while * the BiLevelWorkUnitPacker does. * Return 0 as default if key not found in either form. */ public static long getPropAsLongFromSingleOrMultiWorkUnitState(WorkUnitState workUnitState, String key, int partitionId) { return Long.parseLong(workUnitState.contains(key) ? workUnitState.getProp(key) : workUnitState.getProp(KafkaUtils.getPartitionPropName(key, partitionId), "0")); } }
/** * Add a list of partitions of the same topic to a {@link WorkUnit}. */ private static void populateMultiPartitionWorkUnit(List<KafkaPartition> partitions, WorkUnit workUnit) { Preconditions.checkArgument(!partitions.isEmpty(), "There should be at least one partition"); GobblinMetrics.addCustomTagToState(workUnit, new Tag<>("kafkaTopic", partitions.get(0).getTopicName())); for (int i = 0; i < partitions.size(); i++) { workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PARTITION_ID, i), partitions.get(i).getId()); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.LEADER_ID, i), partitions.get(i).getLeader().getId()); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.LEADER_HOSTANDPORT, i), partitions.get(i).getLeader().getHostAndPort()); } }
/** * Set the average time in milliseconds to pull a record of a partition, which will be stored in property * "[topicname].[partitionid].avg.record.millis". */ public static void setPartitionAvgRecordMillis(State state, KafkaPartition partition, double millis) { state.setProp( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_MILLIS, millis); }
/** * Determines whether the given {@link State} contains "[topicname].[partitionid].avg.record.size". */ public static boolean containsPartitionAvgRecordSize(State state, KafkaPartition partition) { return state.contains( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_SIZE); }
/** * Get the average record size of a partition, which is stored in property "[topicname].[partitionid].avg.record.size". * If state doesn't contain this property, it returns defaultSize. */ public static long getPartitionAvgRecordSize(State state, KafkaPartition partition) { return state.getPropAsLong( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_SIZE); }
this.workUnitState.setProp(KafkaUtils.getPartitionPropName(KafkaSource.START_FETCH_EPOCH_TIME, partitionId), Long.toString(this.startFetchEpochTime.getOrDefault(partition, 0L))); this.workUnitState.setProp(KafkaUtils.getPartitionPropName(KafkaSource.STOP_FETCH_EPOCH_TIME, partitionId), Long.toString(this.stopFetchEpochTime.getOrDefault(partition, 0L)));
/** * Set the average record size of a partition, which will be stored in property * "[topicname].[partitionid].avg.record.size". */ public static void setPartitionAvgRecordSize(State state, KafkaPartition partition, long size) { state.setProp(getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_SIZE, size); }
/** * Determines whether the given {@link State} contains "[topicname].[partitionid].avg.record.millis". */ public static boolean containsPartitionAvgRecordMillis(State state, KafkaPartition partition) { return state.contains( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_MILLIS); }
/** * Get a list of {@link KafkaPartition}s from a {@link State} object. The given {@link State} should contain property * {@link KafkaSource#TOPIC_NAME}. If there are multiple partitions in the {@link State}, all partitions should have * the same topic name. * * It first checks whether the given {@link State} contains "partition.id.i", "leader.id.i" and * "leader.hostandport.i", i = 0,1,2,... * * Otherwise it will call {@link #getPartition(State)}. */ public static List<KafkaPartition> getPartitions(State state) { List<KafkaPartition> partitions = Lists.newArrayList(); String topicName = state.getProp(KafkaSource.TOPIC_NAME); for (int i = 0;; i++) { if (!state.contains(KafkaUtils.getPartitionPropName(KafkaSource.PARTITION_ID, i))) { break; } KafkaPartition partition = new KafkaPartition.Builder().withTopicName(topicName) .withId(state.getPropAsInt(KafkaUtils.getPartitionPropName(KafkaSource.PARTITION_ID, i))) .withLeaderId(state.getPropAsInt(KafkaUtils.getPartitionPropName(KafkaSource.LEADER_ID, i))) .withLeaderHostAndPort(state.getProp(KafkaUtils.getPartitionPropName(KafkaSource.LEADER_HOSTANDPORT, i))) .build(); partitions.add(partition); } if (partitions.isEmpty()) { partitions.add(getPartition(state)); } return partitions; }
/** * Get the average time to pull a record of a partition, which is stored in property * "[topicname].[partitionid].avg.record.millis". If state doesn't contain this property, it returns defaultValue. */ public static double getPartitionAvgRecordMillis(State state, KafkaPartition partition) { double avgRecordMillis = state.getPropAsDouble( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_MILLIS); // cap to prevent a poorly behaved topic from impacting the bin-packing int avgFetchTimeCap = state.getPropAsInt(ConfigurationKeys.KAFKA_SOURCE_AVG_FETCH_TIME_CAP, ConfigurationKeys.DEFAULT_KAFKA_SOURCE_AVG_FETCH_TIME_CAP); if (avgFetchTimeCap > 0 && avgRecordMillis > avgFetchTimeCap) { log.info("Topic {} partition {} has an average fetch time of {}, capping it to {}", partition.getTopicName(), partition.getId(), avgRecordMillis, avgFetchTimeCap); avgRecordMillis = avgFetchTimeCap; } return avgRecordMillis; }
workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PREVIOUS_START_FETCH_EPOCH_TIME, index), wu.getProp(KafkaSource.PREVIOUS_START_FETCH_EPOCH_TIME)); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PREVIOUS_STOP_FETCH_EPOCH_TIME, index), wu.getProp(KafkaSource.PREVIOUS_STOP_FETCH_EPOCH_TIME)); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PREVIOUS_LOW_WATERMARK, index), wu.getProp(KafkaSource.PREVIOUS_LOW_WATERMARK)); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PREVIOUS_HIGH_WATERMARK, index), wu.getProp(KafkaSource.PREVIOUS_HIGH_WATERMARK)); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PREVIOUS_OFFSET_FETCH_EPOCH_TIME, index), wu.getProp(KafkaSource.PREVIOUS_OFFSET_FETCH_EPOCH_TIME)); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.OFFSET_FETCH_EPOCH_TIME, index), wu.getProp(KafkaSource.OFFSET_FETCH_EPOCH_TIME)); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PREVIOUS_LATEST_OFFSET, index), wu.getProp(KafkaSource.PREVIOUS_LATEST_OFFSET)); index++;
/** * Get a property as long from a work unit that may or may not be a multiworkunit. * This method is needed because the SingleLevelWorkUnitPacker does not squeeze work units * into a multiworkunit, and thus does not append the partitionId to property keys, while * the BiLevelWorkUnitPacker does. * Return 0 as default if key not found in either form. */ public static long getPropAsLongFromSingleOrMultiWorkUnitState(WorkUnitState workUnitState, String key, int partitionId) { return Long.parseLong(workUnitState.contains(key) ? workUnitState.getProp(key) : workUnitState.getProp(KafkaUtils.getPartitionPropName(key, partitionId), "0")); } }
/** * Add a list of partitions of the same topic to a {@link WorkUnit}. */ private static void populateMultiPartitionWorkUnit(List<KafkaPartition> partitions, WorkUnit workUnit) { Preconditions.checkArgument(!partitions.isEmpty(), "There should be at least one partition"); GobblinMetrics.addCustomTagToState(workUnit, new Tag<>("kafkaTopic", partitions.get(0).getTopicName())); for (int i = 0; i < partitions.size(); i++) { workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.PARTITION_ID, i), partitions.get(i).getId()); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.LEADER_ID, i), partitions.get(i).getLeader().getId()); workUnit.setProp(KafkaUtils.getPartitionPropName(KafkaSource.LEADER_HOSTANDPORT, i), partitions.get(i).getLeader().getHostAndPort()); } }
/** * Set the average time in milliseconds to pull a record of a partition, which will be stored in property * "[topicname].[partitionid].avg.record.millis". */ public static void setPartitionAvgRecordMillis(State state, KafkaPartition partition, double millis) { state.setProp( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_MILLIS, millis); }
/** * Determines whether the given {@link State} contains "[topicname].[partitionid].avg.record.size". */ public static boolean containsPartitionAvgRecordSize(State state, KafkaPartition partition) { return state.contains( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_SIZE); }
/** * Determines whether the given {@link State} contains "[topicname].[partitionid].avg.record.millis". */ public static boolean containsPartitionAvgRecordMillis(State state, KafkaPartition partition) { return state.contains( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_MILLIS); }
/** * Get the average record size of a partition, which is stored in property "[topicname].[partitionid].avg.record.size". * If state doesn't contain this property, it returns defaultSize. */ public static long getPartitionAvgRecordSize(State state, KafkaPartition partition) { return state.getPropAsLong( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_SIZE); }
/** * Set the average record size of a partition, which will be stored in property * "[topicname].[partitionid].avg.record.size". */ public static void setPartitionAvgRecordSize(State state, KafkaPartition partition, long size) { state.setProp(getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_SIZE, size); }
/** * Get a list of {@link KafkaPartition}s from a {@link State} object. The given {@link State} should contain property * {@link KafkaSource#TOPIC_NAME}. If there are multiple partitions in the {@link State}, all partitions should have * the same topic name. * * It first checks whether the given {@link State} contains "partition.id.i", "leader.id.i" and * "leader.hostandport.i", i = 0,1,2,... * * Otherwise it will call {@link #getPartition(State)}. */ public static List<KafkaPartition> getPartitions(State state) { List<KafkaPartition> partitions = Lists.newArrayList(); String topicName = state.getProp(KafkaSource.TOPIC_NAME); for (int i = 0;; i++) { if (!state.contains(KafkaUtils.getPartitionPropName(KafkaSource.PARTITION_ID, i))) { break; } KafkaPartition partition = new KafkaPartition.Builder().withTopicName(topicName) .withId(state.getPropAsInt(KafkaUtils.getPartitionPropName(KafkaSource.PARTITION_ID, i))) .withLeaderId(state.getPropAsInt(KafkaUtils.getPartitionPropName(KafkaSource.LEADER_ID, i))) .withLeaderHostAndPort(state.getProp(KafkaUtils.getPartitionPropName(KafkaSource.LEADER_HOSTANDPORT, i))) .build(); partitions.add(partition); } if (partitions.isEmpty()) { partitions.add(getPartition(state)); } return partitions; }
/** * Get the average time to pull a record of a partition, which is stored in property * "[topicname].[partitionid].avg.record.millis". If state doesn't contain this property, it returns defaultValue. */ public static double getPartitionAvgRecordMillis(State state, KafkaPartition partition) { double avgRecordMillis = state.getPropAsDouble( getPartitionPropName(partition.getTopicName(), partition.getId()) + "." + KafkaSource.AVG_RECORD_MILLIS); // cap to prevent a poorly behaved topic from impacting the bin-packing int avgFetchTimeCap = state.getPropAsInt(ConfigurationKeys.KAFKA_SOURCE_AVG_FETCH_TIME_CAP, ConfigurationKeys.DEFAULT_KAFKA_SOURCE_AVG_FETCH_TIME_CAP); if (avgFetchTimeCap > 0 && avgRecordMillis > avgFetchTimeCap) { log.info("Topic {} partition {} has an average fetch time of {}, capping it to {}", partition.getTopicName(), partition.getId(), avgRecordMillis, avgFetchTimeCap); avgRecordMillis = avgFetchTimeCap; } return avgRecordMillis; }