@Override public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames) { return getSystemStreamMetadata(streamNames, new ExponentialSleepStrategy(DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER, DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS, DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS)); }
@Override public boolean createStream(StreamSpec streamSpec) { LOG.info("Creating Kafka topic: {} on system: {}", streamSpec.getPhysicalName(), streamSpec.getSystemName()); return KafkaSystemAdminUtilsScala.createStream(toKafkaSpec(streamSpec), getZkConnection()); }
/** * Delete records up to (and including) the provided ssp offsets for * all system stream partitions specified in the map. * This only works with Kafka cluster 0.11 or later. Otherwise it's a no-op. * @param offsets specifies up to what offsets the messages should be deleted */ @Override public void deleteMessages(Map<SystemStreamPartition, String> offsets) { if (deleteCommittedMessages) { if (adminClientForDelete == null) { adminClientForDelete = kafka.admin.AdminClient.create(createAdminClientProperties()); } KafkaSystemAdminUtilsScala.deleteMessages(adminClientForDelete, offsets); deleteMessageCalled = true; } }
@Override public boolean clearStream(StreamSpec streamSpec) { LOG.info("Creating Kafka topic: {} on system: {}", streamSpec.getPhysicalName(), streamSpec.getSystemName()); KafkaSystemAdminUtilsScala.clearStream(streamSpec, getZkConnection()); Map<String, List<PartitionInfo>> topicsMetadata = getTopicMetadata(ImmutableSet.of(streamSpec.getPhysicalName())); return topicsMetadata.get(streamSpec.getPhysicalName()).isEmpty(); }
public void testCreateChangelogStreamHelp(final String topic) { final int PARTITIONS = 12; final int REP_FACTOR = 2; Map<String, String> map = new HashMap<>(); map.put(JobConfig.JOB_DEFAULT_SYSTEM(), SYSTEM()); map.put(String.format("stores.%s.changelog", "fakeStore"), topic); map.put(String.format("stores.%s.changelog.replication.factor", "fakeStore"), String.valueOf(REP_FACTOR)); map.put(String.format("stores.%s.changelog.kafka.segment.bytes", "fakeStore"), "139"); KafkaSystemAdmin admin = Mockito.spy(createSystemAdmin(SYSTEM(), map)); StreamSpec spec = StreamSpec.createChangeLogStreamSpec(topic, SYSTEM(), PARTITIONS); Mockito.doAnswer(invocationOnMock -> { StreamSpec internalSpec = (StreamSpec) invocationOnMock.callRealMethod(); assertTrue(internalSpec instanceof KafkaStreamSpec); // KafkaStreamSpec is used to carry replication factor assertTrue(internalSpec.isChangeLogStream()); assertEquals(SYSTEM(), internalSpec.getSystemName()); assertEquals(topic, internalSpec.getPhysicalName()); assertEquals(REP_FACTOR, ((KafkaStreamSpec) internalSpec).getReplicationFactor()); assertEquals(PARTITIONS, internalSpec.getPartitionCount()); assertEquals("139", ((KafkaStreamSpec) internalSpec).getProperties().getProperty("segment.bytes")); assertEquals("compact", ((KafkaStreamSpec) internalSpec).getProperties().getProperty("cleanup.policy")); return internalSpec; }).when(admin).toKafkaSpec(Mockito.any()); admin.createStream(spec); admin.validateStream(spec); }
@Override public Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> getSSPMetadata( Set<SystemStreamPartition> ssps) { LOG.info("Fetching SSP metadata for: {}", ssps); List<TopicPartition> topicPartitions = ssps.stream() .map(ssp -> new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId())) .collect(Collectors.toList()); OffsetsMaps topicPartitionsMetadata = fetchTopicPartitionsMetadata(topicPartitions); Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> sspToSSPMetadata = new HashMap<>(); for (SystemStreamPartition ssp : ssps) { String oldestOffset = topicPartitionsMetadata.getOldestOffsets().get(ssp); String newestOffset = topicPartitionsMetadata.getNewestOffsets().get(ssp); String upcomingOffset = topicPartitionsMetadata.getUpcomingOffsets().get(ssp); sspToSSPMetadata.put(ssp, new SystemStreamMetadata.SystemStreamPartitionMetadata(oldestOffset, newestOffset, upcomingOffset)); } return sspToSSPMetadata; }
final String REPL_FACTOR = "replication.factor"; KafkaStreamSpec kSpec = toKafkaSpec(streamSpec); String topicName = kSpec.getPhysicalName();
oldestOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset)); }); upcomingOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset)); "Empty Kafka topic partition {} with upcoming offset {}. Skipping newest offset and setting oldest offset to 0 to consume from beginning", topicPartition, offset); oldestOffsets.put(toSystemStreamPartition(topicPartition), "0"); } else { newestOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset - 1));
@Test public void testCreateCoordinatorStreamWithSpecialCharsInTopicName() { final String STREAM = "test.coordinator_test.Stream"; Map<String, String> map = new HashMap<>(); map.put("job.coordinator.segment.bytes", "123"); map.put("job.coordinator.cleanup.policy", "compact"); int coordReplicatonFactor = 2; map.put(org.apache.samza.config.KafkaConfig.JOB_COORDINATOR_REPLICATION_FACTOR(), String.valueOf(coordReplicatonFactor)); KafkaSystemAdmin admin = Mockito.spy(createSystemAdmin(SYSTEM(), map)); StreamSpec spec = StreamSpec.createCoordinatorStreamSpec(STREAM, SYSTEM()); Mockito.doAnswer(invocationOnMock -> { StreamSpec internalSpec = (StreamSpec) invocationOnMock.callRealMethod(); assertTrue(internalSpec instanceof KafkaStreamSpec); // KafkaStreamSpec is used to carry replication factor assertTrue(internalSpec.isCoordinatorStream()); assertEquals(SYSTEM(), internalSpec.getSystemName()); assertEquals(STREAM, internalSpec.getPhysicalName()); assertEquals(1, internalSpec.getPartitionCount()); Assert.assertEquals(coordReplicatonFactor, ((KafkaStreamSpec) internalSpec).getReplicationFactor()); Assert.assertEquals("123", ((KafkaStreamSpec) internalSpec).getProperties().getProperty("segment.bytes")); // cleanup policy is overridden in the KafkaAdmin Assert.assertEquals("compact", ((KafkaStreamSpec) internalSpec).getProperties().getProperty("cleanup.policy")); return internalSpec; }).when(admin).toKafkaSpec(Mockito.any()); admin.createStream(spec); admin.validateStream(spec); }
@Override public Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> getSSPMetadata( Set<SystemStreamPartition> ssps) { LOG.info("Fetching SSP metadata for: {}", ssps); List<TopicPartition> topicPartitions = ssps.stream() .map(ssp -> new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId())) .collect(Collectors.toList()); OffsetsMaps topicPartitionsMetadata = fetchTopicPartitionsMetadata(topicPartitions); Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> sspToSSPMetadata = new HashMap<>(); for (SystemStreamPartition ssp : ssps) { String oldestOffset = topicPartitionsMetadata.getOldestOffsets().get(ssp); String newestOffset = topicPartitionsMetadata.getNewestOffsets().get(ssp); String upcomingOffset = topicPartitionsMetadata.getUpcomingOffsets().get(ssp); sspToSSPMetadata.put(ssp, new SystemStreamMetadata.SystemStreamPartitionMetadata(oldestOffset, newestOffset, upcomingOffset)); } return sspToSSPMetadata; }
@Override public boolean clearStream(StreamSpec streamSpec) { LOG.info("Creating Kafka topic: {} on system: {}", streamSpec.getPhysicalName(), streamSpec.getSystemName()); KafkaSystemAdminUtilsScala.clearStream(streamSpec, getZkConnection()); Map<String, List<PartitionInfo>> topicsMetadata = getTopicMetadata(ImmutableSet.of(streamSpec.getPhysicalName())); return topicsMetadata.get(streamSpec.getPhysicalName()).isEmpty(); }
KafkaStreamSpec kafkaSpec = kafkaAdmin.toKafkaSpec(spec); kafkaSpec = admin.toKafkaSpec(spec); Assert.assertEquals(coordReplicatonFactor, kafkaSpec.getReplicationFactor()); Assert.assertEquals("123", kafkaSpec.getProperties().getProperty("segment.bytes")); admin = Mockito.spy(createSystemAdmin(SYSTEM(), map)); spec = StreamSpec.createChangeLogStreamSpec(topicName, SYSTEM(), changeLogPartitionFactor); kafkaSpec = admin.toKafkaSpec(spec); Assert.assertEquals(changeLogReplicationFactor, kafkaSpec.getReplicationFactor()); admin = Mockito.spy(createSystemAdmin(SYSTEM(), map)); spec = StreamSpec.createChangeLogStreamSpec("anotherTopic", SYSTEM(), changeLogPartitionFactor); kafkaSpec = admin.toKafkaSpec(spec); Assert.fail("toKafkaSpec should've failed for missing topic"); } catch (StreamValidationException e) { kafkaSpec = admin.toKafkaSpec(spec); Assert.assertEquals("v1", kafkaSpec.getProperties().getProperty("p1")); Assert.assertEquals("v2", kafkaSpec.getProperties().getProperty("p2"));
oldestOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset)); }); upcomingOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset)); "Empty Kafka topic partition {} with upcoming offset {}. Skipping newest offset and setting oldest offset to 0 to consume from beginning", topicPartition, offset); oldestOffsets.put(toSystemStreamPartition(topicPartition), "0"); } else { newestOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset - 1));
@Override public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames) { return getSystemStreamMetadata(streamNames, new ExponentialSleepStrategy(DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER, DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS, DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS)); }
@Override public boolean createStream(StreamSpec streamSpec) { LOG.info("Creating Kafka topic: {} on system: {}", streamSpec.getPhysicalName(), streamSpec.getSystemName()); return KafkaSystemAdminUtilsScala.createStream(toKafkaSpec(streamSpec), getZkConnection()); }
@Override public Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> getSSPMetadata( Set<SystemStreamPartition> ssps) { LOG.info("Fetching SSP metadata for: {}", ssps); List<TopicPartition> topicPartitions = ssps.stream() .map(ssp -> new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId())) .collect(Collectors.toList()); OffsetsMaps topicPartitionsMetadata = fetchTopicPartitionsMetadata(topicPartitions); Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> sspToSSPMetadata = new HashMap<>(); for (SystemStreamPartition ssp : ssps) { String oldestOffset = topicPartitionsMetadata.getOldestOffsets().get(ssp); String newestOffset = topicPartitionsMetadata.getNewestOffsets().get(ssp); String upcomingOffset = topicPartitionsMetadata.getUpcomingOffsets().get(ssp); sspToSSPMetadata.put(ssp, new SystemStreamMetadata.SystemStreamPartitionMetadata(oldestOffset, newestOffset, upcomingOffset)); } return sspToSSPMetadata; }
/** * Delete records up to (and including) the provided ssp offsets for * all system stream partitions specified in the map. * This only works with Kafka cluster 0.11 or later. Otherwise it's a no-op. * @param offsets specifies up to what offsets the messages should be deleted */ @Override public void deleteMessages(Map<SystemStreamPartition, String> offsets) { if (deleteCommittedMessages) { if (adminClient == null) { adminClient = AdminClient.create(createAdminClientProperties()); } KafkaSystemAdminUtilsScala.deleteMessages(adminClient, offsets); deleteMessageCalled = true; } }
oldestOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset)); }); upcomingOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset)); "Empty Kafka topic partition {} with upcoming offset {}. Skipping newest offset and setting oldest offset to 0 to consume from beginning", topicPartition, offset); oldestOffsets.put(toSystemStreamPartition(topicPartition), "0"); } else { newestOffsets.put(toSystemStreamPartition(topicPartition), String.valueOf(offset - 1));
@Override public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames) { return getSystemStreamMetadata(streamNames, new ExponentialSleepStrategy(DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER, DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS, DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS)); }
/** * Fetch SystemStreamMetadata for each topic with the consumer * @param topics set of topics to get metadata info for * @return map of topic to SystemStreamMetadata */ private Map<String, SystemStreamMetadata> fetchSystemStreamMetadata(Set<String> topics) { Map<SystemStreamPartition, String> allOldestOffsets = new HashMap<>(); Map<SystemStreamPartition, String> allNewestOffsets = new HashMap<>(); Map<SystemStreamPartition, String> allUpcomingOffsets = new HashMap<>(); LOG.info("Fetching SystemStreamMetadata for topics {} on system {}", topics, systemName); topics.forEach(topic -> { List<PartitionInfo> partitionInfos = metadataConsumer.partitionsFor(topic); if (partitionInfos == null) { String msg = String.format("Partition info not(yet?) available for system %s topic %s", systemName, topic); throw new SamzaException(msg); } List<TopicPartition> topicPartitions = partitionInfos.stream() .map(partitionInfo -> new TopicPartition(partitionInfo.topic(), partitionInfo.partition())) .collect(Collectors.toList()); OffsetsMaps offsetsForTopic = fetchTopicPartitionsMetadata(topicPartitions); allOldestOffsets.putAll(offsetsForTopic.getOldestOffsets()); allNewestOffsets.putAll(offsetsForTopic.getNewestOffsets()); allUpcomingOffsets.putAll(offsetsForTopic.getUpcomingOffsets()); }); scala.collection.immutable.Map<String, SystemStreamMetadata> result = KafkaSystemAdminUtilsScala.assembleMetadata(ScalaJavaUtil.toScalaMap(allOldestOffsets), ScalaJavaUtil.toScalaMap(allNewestOffsets), ScalaJavaUtil.toScalaMap(allUpcomingOffsets)); LOG.debug("assembled SystemStreamMetadata is: {}", result); return JavaConverters.mapAsJavaMapConverter(result).asJava(); }