/** * Constructs an {@link InputDescriptor} instance. * * @param streamId id of the stream * @param valueSerde serde the values in the messages in the stream * @param systemDescriptor system descriptor this stream descriptor was obtained from */ <T> KinesisInputDescriptor(String streamId, Serde<T> valueSerde, SystemDescriptor systemDescriptor) { super(streamId, KVSerde.of(new NoOpSerde<>(), valueSerde), systemDescriptor, null); }
/** * Constructs an {@link OutputDescriptor} instance. * * @param streamId id of the stream * @param namespace namespace for the Event Hubs entity to produce to, not null * @param entityPath entity path for the Event Hubs entity to produce to, not null * @param valueSerde serde the values in the messages in the stream * @param systemDescriptor system descriptor this stream descriptor was obtained from */ EventHubsOutputDescriptor(String streamId, String namespace, String entityPath, Serde valueSerde, SystemDescriptor systemDescriptor) { super(streamId, KVSerde.of(new NoOpSerde<>(), valueSerde), systemDescriptor); this.namespace = StringUtils.stripToNull(namespace); this.entityPath = StringUtils.stripToNull(entityPath); if (this.namespace == null || this.entityPath == null) { throw new ConfigException(String.format("Missing namespace and entity path Event Hubs output descriptor in " // + "system: {%s}, stream: {%s}", getSystemName(), streamId)); } }
/** * Construct a table manager instance * @param config job configuration * @param serdes Serde instances for tables */ public TableManager(Config config, Map<String, Serde<Object>> serdes) { new JavaTableConfig(config).getTableIds().forEach(tableId -> { // Construct the table provider String tableProviderFactory = config.get(String.format(JavaTableConfig.TABLE_PROVIDER_FACTORY, tableId)); // Construct the KVSerde JavaTableConfig tableConfig = new JavaTableConfig(config); KVSerde serde = KVSerde.of( serdes.get(tableConfig.getKeySerde(tableId)), serdes.get(tableConfig.getValueSerde(tableId))); TableSpec tableSpec = new TableSpec(tableId, serde, tableProviderFactory, config.subset(String.format(JavaTableConfig.TABLE_ID_PREFIX, tableId) + ".")); addTable(tableSpec); logger.info("Added table " + tableSpec.getId()); }); }
/** * Construct a table manager instance * @param config job configuration * @param serdes Serde instances for tables */ public TableManager(Config config, Map<String, Serde<Object>> serdes) { new JavaTableConfig(config).getTableIds().forEach(tableId -> { // Construct the table provider String tableProviderFactory = config.get(String.format(JavaTableConfig.TABLE_PROVIDER_FACTORY, tableId)); // Construct the KVSerde JavaTableConfig tableConfig = new JavaTableConfig(config); KVSerde serde = KVSerde.of( serdes.get(tableConfig.getKeySerde(tableId)), serdes.get(tableConfig.getValueSerde(tableId))); TableSpec tableSpec = new TableSpec(tableId, serde, tableProviderFactory, config.subset(String.format(JavaTableConfig.TABLE_ID_PREFIX, tableId) + ".")); addTable(tableSpec); logger.info("Added table " + tableSpec.getId()); }); }
/** * Construct a table manager instance * @param config job configuration * @param serdes Serde instances for tables */ public TableManager(Config config, Map<String, Serde<Object>> serdes) { new JavaTableConfig(config).getTableIds().forEach(tableId -> { // Construct the table provider String tableProviderFactory = config.get(String.format(JavaTableConfig.TABLE_PROVIDER_FACTORY, tableId)); // Construct the KVSerde JavaTableConfig tableConfig = new JavaTableConfig(config); KVSerde serde = KVSerde.of( serdes.get(tableConfig.getKeySerde(tableId)), serdes.get(tableConfig.getValueSerde(tableId))); TableSpec tableSpec = new TableSpec(tableId, serde, tableProviderFactory, config.subset(String.format(JavaTableConfig.TABLE_ID_PREFIX, tableId) + ".")); addTable(tableSpec); logger.info("Added table " + tableSpec.getId()); }); }
/** * Construct a table manager instance * @param config job configuration * @param serdes Serde instances for tables */ public TableManager(Config config, Map<String, Serde<Object>> serdes) { new JavaTableConfig(config).getTableIds().forEach(tableId -> { // Construct the table provider String tableProviderFactory = config.get(String.format(JavaTableConfig.TABLE_PROVIDER_FACTORY, tableId)); // Construct the KVSerde JavaTableConfig tableConfig = new JavaTableConfig(config); KVSerde serde = KVSerde.of( serdes.get(tableConfig.getKeySerde(tableId)), serdes.get(tableConfig.getValueSerde(tableId))); TableSpec tableSpec = new TableSpec(tableId, serde, tableProviderFactory, config.subset(String.format(JavaTableConfig.TABLE_ID_PREFIX, tableId) + ".")); addTable(tableSpec); logger.info("Added table " + tableSpec.getId()); }); }
/** * Constructs an {@link InputDescriptor} instance. * * @param streamId id of the stream * @param namespace namespace for the Event Hubs entity to consume from, not null * @param entityPath entity path for the Event Hubs entity to consume from, not null * @param valueSerde serde the values in the messages in the stream * @param systemDescriptor system descriptor this stream descriptor was obtained from */ EventHubsInputDescriptor(String streamId, String namespace, String entityPath, Serde valueSerde, SystemDescriptor systemDescriptor) { super(streamId, KVSerde.of(new NoOpSerde<>(), valueSerde), systemDescriptor, null); this.namespace = StringUtils.stripToNull(namespace); this.entityPath = StringUtils.stripToNull(entityPath); if (this.namespace == null || this.entityPath == null) { throw new ConfigException(String.format("Missing namespace and entity path Event Hubs input descriptor in " // + "system: {%s}, stream: {%s}", getSystemName(), streamId)); } }
void translate(final TableScan tableScan, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<KV<Object, Object>>> inputMsgStreams) { StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor(); List<String> tableNameParts = tableScan.getTable().getQualifiedName(); String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts); Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName)); SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName); final String systemName = sqlIOConfig.getSystemName(); final String streamName = sqlIOConfig.getStreamName(); final String source = sqlIOConfig.getSource(); KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new); GenericInputDescriptor<KV<Object, Object>> isd = sd.getInputDescriptor(streamName, noOpKVSerde); MessageStream<KV<Object, Object>> inputStream = inputMsgStreams.computeIfAbsent(source, v -> streamAppDesc.getInputStream(isd)); MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputStream.map(new ScanMapFunction(sourceName, queryId)); context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream); } }
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) { SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream); MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId()); MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId)); Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor(); if (!tableDescriptor.isPresent()) { KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); String systemName = sinkConfig.getSystemName(); DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new); GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde); OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd)); outputStream.sendTo(stm); } else { Table outputTable = appDesc.getTable(tableDescriptor.get()); if (outputTable == null) { String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource(); throw new SamzaException(msg); } outputStream.sendTo(outputTable); } } }
@Test public void testISDConfigsWithDefaults() { KafkaSystemDescriptor sd = new KafkaSystemDescriptor("kafka") .withConsumerZkConnect(ImmutableList.of("localhost:123")) .withProducerBootstrapServers(ImmutableList.of("localhost:567", "localhost:890")); KafkaInputDescriptor<KV<String, Integer>> isd = sd.getInputDescriptor("input-stream", KVSerde.of(new StringSerde(), new IntegerSerde())); Map<String, String> generatedConfigs = isd.toConfig(); assertEquals("kafka", generatedConfigs.get("streams.input-stream.samza.system")); assertEquals(1, generatedConfigs.size()); // verify that there are no other configs } }
@Before public void setup() { Map<String, String> configMap = new HashMap<>(); configMap.put("job.default.system", "kafka"); configMap.put("job.name", "jobName"); configMap.put("job.id", "jobId"); this.config = new MapConfig(configMap); this.context = new MockContext(); when(this.context.getJobContext().getConfig()).thenReturn(this.config); Serde storeKeySerde = new TimeSeriesKeySerde(new IntegerSerde()); Serde storeValSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde()); TaskModel taskModel = mock(TaskModel.class); when(taskModel.getSystemStreamPartitions()).thenReturn(ImmutableSet .of(new SystemStreamPartition("kafka", "integTestExecutionPlannerers", new Partition(0)))); when(taskModel.getTaskName()).thenReturn(new TaskName("task 1")); when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel); when(this.context.getTaskContext().getTaskMetricsRegistry()).thenReturn(new MetricsRegistryMap()); when(this.context.getContainerContext().getContainerMetricsRegistry()).thenReturn(new MetricsRegistryMap()); when(this.context.getTaskContext().getStore("jobName-jobId-window-w1")) .thenReturn(new TestInMemoryStore<>(storeKeySerde, storeValSerde)); }
@Test public void testISDConfigsWithOverrides() { KafkaSystemDescriptor sd = new KafkaSystemDescriptor("kafka"); KafkaInputDescriptor<KV<String, Integer>> isd = sd.getInputDescriptor("input-stream", KVSerde.of(new StringSerde(), new IntegerSerde())) .withConsumerAutoOffsetReset("largest") .withConsumerFetchMessageMaxBytes(1024 * 1024); Map<String, String> generatedConfigs = isd.toConfig();; assertEquals("kafka", generatedConfigs.get("streams.input-stream.samza.system")); assertEquals("largest", generatedConfigs.get("systems.kafka.streams.input-stream.consumer.auto.offset.reset")); assertEquals("1048576", generatedConfigs.get("systems.kafka.streams.input-stream.consumer.fetch.message.max.bytes")); }
@Test(expected = SamzaException.class) public void joinWithSelfThrowsException() throws Exception { Map<String, String> mapConfig = new HashMap<>(); mapConfig.put("job.name", "jobName"); mapConfig.put("job.id", "jobId"); StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream"); Config config = new MapConfig(mapConfig); StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> { IntegerSerde integerSerde = new IntegerSerde(); KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde); GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName"); GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("inStream", kvSerde); MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor); inStream.join(inStream, new TestJoinFunction(), integerSerde, kvSerde, kvSerde, JOIN_TTL, "join"); }, config); createStreamOperatorTask(new SystemClock(), streamAppDesc); // should throw an exception }
@Test public void testPartitionBy() { MapFunction<Object, String> keyFn = m -> m.toString(); MapFunction<Object, Object> valueFn = m -> m; KVSerde<Object, Object> partitionBySerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> { MessageStream inputStream = appDesc.getInputStream(testInputDescriptor); inputStream.partitionBy(keyFn, valueFn, partitionBySerde, testRepartitionedStreamName); }, getConfig()); assertEquals(2, streamAppDesc.getInputOperators().size()); Map<String, InputOperatorSpec> inputOpSpecs = streamAppDesc.getInputOperators(); assertTrue(inputOpSpecs.keySet().contains(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName))); InputOperatorSpec inputOpSpec = inputOpSpecs.get(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)); assertEquals(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName), inputOpSpec.getStreamId()); assertTrue(inputOpSpec.getKeySerde() instanceof NoOpSerde); assertTrue(inputOpSpec.getValueSerde() instanceof NoOpSerde); assertTrue(inputOpSpec.isKeyed()); assertNull(inputOpSpec.getScheduledFn()); assertNull(inputOpSpec.getWatermarkFn()); InputOperatorSpec originInputSpec = inputOpSpecs.get(testInputDescriptor.getStreamId()); assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec); PartitionByOperatorSpec reparOpSpec = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0]; assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)); assertEquals(reparOpSpec.getKeyFunction(), keyFn); assertEquals(reparOpSpec.getValueFunction(), valueFn); assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId()); assertNull(reparOpSpec.getScheduledFn()); assertNull(reparOpSpec.getWatermarkFn()); }
private StreamApplicationDescriptorImpl getTestJoinStreamGraph(TestJoinFunction joinFn) throws IOException { Map<String, String> mapConfig = new HashMap<>(); mapConfig.put("job.name", "jobName"); mapConfig.put("job.id", "jobId"); StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream"); StreamTestUtils.addStreamConfigs(mapConfig, "inStream2", "insystem", "instream2"); Config config = new MapConfig(mapConfig); return new StreamApplicationDescriptorImpl(appDesc -> { IntegerSerde integerSerde = new IntegerSerde(); KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde); GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName"); GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor1 = sd.getInputDescriptor("inStream", kvSerde); GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor2 = sd.getInputDescriptor("inStream2", kvSerde); MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor1); MessageStream<KV<Integer, Integer>> inStream2 = appDesc.getInputStream(inputDescriptor2); inStream .join(inStream2, joinFn, integerSerde, kvSerde, kvSerde, JOIN_TTL, "j1") .sink((message, messageCollector, taskCoordinator) -> { SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream"); messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message)); }); }, config); }
@Test public void testWithSerdes() { List<TableDescriptor> descriptors = Arrays.asList( new MockLocalTableDescriptor("t1", KVSerde.of(new StringSerde(), new IntegerSerde())), new MockLocalTableDescriptor("t2", KVSerde.of(new StringSerde(), new IntegerSerde())) ); Config jobConfig = new MapConfig(TableConfigGenerator.generateSerdeConfig(descriptors)); JavaTableConfig javaTableConfig = new JavaTableConfig(jobConfig); assertNotNull(javaTableConfig.getKeySerde("t1")); assertNotNull(javaTableConfig.getMsgSerde("t1")); assertNotNull(javaTableConfig.getKeySerde("t2")); assertNotNull(javaTableConfig.getMsgSerde("t2")); MapConfig tableConfig = new MapConfig(TableConfigGenerator.generate(jobConfig, descriptors)); javaTableConfig = new JavaTableConfig(tableConfig); assertNotNull(javaTableConfig.getTableProviderFactory("t1")); assertNotNull(javaTableConfig.getTableProviderFactory("t2")); }
private StreamApplicationDescriptorImpl getKeyedSessionWindowStreamGraph(AccumulationMode mode, Duration duration) throws IOException { StreamApplication userApp = appDesc -> { KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde()); GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass"); GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde); appDesc.getInputStream(inputDescriptor) .window(Windows.keyedSessionWindow(KV::getKey, duration, new IntegerSerde(), kvSerde) .setAccumulationMode(mode), "w1") .sink((message, messageCollector, taskCoordinator) -> { SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream"); messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message)); }); }; return new StreamApplicationDescriptorImpl(userApp, config); }
private StreamApplicationDescriptorImpl getKeyedTumblingWindowStreamGraph(AccumulationMode mode, Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException { StreamApplication userApp = appDesc -> { KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde()); GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass"); GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde); appDesc.getInputStream(inputDescriptor) .window(Windows.keyedTumblingWindow(KV::getKey, duration, new IntegerSerde(), kvSerde) .setEarlyTrigger(earlyTrigger).setAccumulationMode(mode), "w1") .sink((message, messageCollector, taskCoordinator) -> { SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream"); messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message)); }); }; return new StreamApplicationDescriptorImpl(userApp, config); }
private StreamApplicationDescriptorImpl getTumblingWindowStreamGraph(AccumulationMode mode, Duration duration, Trigger<KV<Integer, Integer>> earlyTrigger) throws IOException { StreamApplication userApp = appDesc -> { KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde()); GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass"); GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde); appDesc.getInputStream(inputDescriptor) .window(Windows.tumblingWindow(duration, kvSerde).setEarlyTrigger(earlyTrigger) .setAccumulationMode(mode), "w1") .sink((message, messageCollector, taskCoordinator) -> { SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream"); messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message)); }); }; return new StreamApplicationDescriptorImpl(userApp, config); }
private StreamApplicationDescriptorImpl getAggregateTumblingWindowStreamGraph(AccumulationMode mode, Duration timeDuration, Trigger<IntegerEnvelope> earlyTrigger) throws IOException { StreamApplication userApp = appDesc -> { KVSerde<Integer, Integer> kvSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde()); GenericSystemDescriptor sd = new GenericSystemDescriptor("kafka", "mockFactoryClass"); GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("integers", kvSerde); MessageStream<KV<Integer, Integer>> integers = appDesc.getInputStream(inputDescriptor); integers .map(new KVMapFunction()) .window(Windows.<IntegerEnvelope, Integer>tumblingWindow(timeDuration, () -> 0, (m, c) -> c + 1, new IntegerSerde()) .setEarlyTrigger(earlyTrigger) .setAccumulationMode(mode), "w1") .sink((message, messageCollector, taskCoordinator) -> { SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream"); messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message)); }); }; return new StreamApplicationDescriptorImpl(userApp, config); }