public SamzaSqlApplicationConfig(Config staticConfig, Set<String> inputSystemStreams, Set<String> outputSystemStreams) { ioResolver = createIOResolver(staticConfig); inputSystemStreamConfigBySource = inputSystemStreams.stream() .collect(Collectors.toMap(Function.identity(), src -> ioResolver.fetchSourceInfo(src))); outputSystemStreamConfigsBySource = outputSystemStreams.stream() .collect(Collectors.toMap(Function.identity(), x -> ioResolver.fetchSinkInfo(x))); systemStreamConfigsBySource = new HashMap<>(inputSystemStreamConfigBySource); systemStreamConfigsBySource.putAll(outputSystemStreamConfigsBySource); Set<SqlIOConfig> systemStreamConfigs = new HashSet<>(systemStreamConfigsBySource.values()); relSchemaProvidersBySource = systemStreamConfigs.stream() .collect(Collectors.toMap(SqlIOConfig::getSource, x -> initializePlugin("RelSchemaProvider", x.getRelSchemaProviderName(), staticConfig, CFG_FMT_REL_SCHEMA_PROVIDER_DOMAIN, (o, c) -> ((RelSchemaProviderFactory) o).create(x.getSystemStream(), c)))); samzaRelConvertersBySource = systemStreamConfigs.stream() .collect(Collectors.toMap(SqlIOConfig::getSource, x -> initializePlugin("SamzaRelConverter", x.getSamzaRelConverterName(), staticConfig, CFG_FMT_SAMZA_REL_CONVERTER_DOMAIN, (o, c) -> ((SamzaRelConverterFactory) o).create(x.getSystemStream(), relSchemaProvidersBySource.get(x.getSource()), c)))); udfResolver = createUdfResolver(staticConfig); udfMetadata = udfResolver.getUdfs(); windowDurationMs = staticConfig.getLong(CFG_GROUPBY_WINDOW_DURATION_MS, DEFAULT_GROUPBY_WINDOW_DURATION_MS); // remove the SqlIOConfigs of outputs whose system is "log" out of systemStreamConfigsBySource outputSystemStreamConfigsBySource.forEach((k, v) -> { if (k.split("\\.")[0].equals(SamzaSqlApplicationConfig.SAMZA_SYSTEM_LOG)) { systemStreamConfigsBySource.remove(k); } }); }
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) { SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream); MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId()); MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId)); Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor(); if (!tableDescriptor.isPresent()) { KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); String systemName = sinkConfig.getSystemName(); DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new); GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde); OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd)); outputStream.sendTo(stm); } else { Table outputTable = appDesc.getTable(tableDescriptor.get()); if (outputTable == null) { String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource(); throw new SamzaException(msg); } outputStream.sendTo(outputTable); } } }
newConfig.put(String.format(CFG_FMT_SAMZA_STREAM_SYSTEM, inputSystemStreamConfig.getStreamId()), inputSystemStreamConfig.getSystemName()); newConfig.putAll(inputSystemStreamConfig.getConfig()); newConfig.put(String.format(CFG_FMT_SAMZA_STREAM_SYSTEM, outputSystemStreamConfig.getStreamId()), outputSystemStreamConfig.getSystemName()); newConfig.putAll(outputSystemStreamConfig.getConfig());
newConfig.put(String.format(CFG_FMT_SAMZA_STREAM_SYSTEM, inputSystemStreamConfig.getStreamName()), inputSystemStreamConfig.getSystemName()); newConfig.putAll(inputSystemStreamConfig.getConfig()); newConfig.put(String.format(CFG_FMT_SAMZA_STREAM_SYSTEM, outputSystemStreamConfig.getStreamName()), outputSystemStreamConfig.getSystemName()); newConfig.putAll(outputSystemStreamConfig.getConfig());
void translate(final TableScan tableScan, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<KV<Object, Object>>> inputMsgStreams) { StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor(); List<String> tableNameParts = tableScan.getTable().getQualifiedName(); String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts); Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName)); SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName); final String systemName = sqlIOConfig.getSystemName(); final String streamName = sqlIOConfig.getStreamName(); final String source = sqlIOConfig.getSource(); KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new); GenericInputDescriptor<KV<Object, Object>> isd = sd.getInputDescriptor(streamName, noOpKVSerde); MessageStream<KV<Object, Object>> inputStream = inputMsgStreams.computeIfAbsent(source, v -> streamAppDesc.getInputStream(isd)); MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputStream.map(new ScanMapFunction(sourceName, queryId)); context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream); } }
@Override public SqlSchema getTableSchema(ExecutionContext context, String tableName) { /** * currently Shell works only for systems that has Avro schemas */ lastErrorMsg = ""; int execId = execIdSeq.incrementAndGet(); Map<String, String> staticConfigs = fetchSamzaSqlConfig(execId, context); Config samzaSqlConfig = new MapConfig(staticConfigs); SqlSchema sqlSchema = null; try { SqlIOResolver ioResolver = SamzaSqlApplicationConfig.createIOResolver(samzaSqlConfig); SqlIOConfig sourceInfo = ioResolver.fetchSourceInfo(tableName); RelSchemaProvider schemaProvider = SamzaSqlApplicationConfig.initializePlugin("RelSchemaProvider", sourceInfo.getRelSchemaProviderName(), samzaSqlConfig, SamzaSqlApplicationConfig.CFG_FMT_REL_SCHEMA_PROVIDER_DOMAIN, (o, c) -> ((RelSchemaProviderFactory) o).create(sourceInfo.getSystemStream(), c)); sqlSchema = schemaProvider.getSqlSchema(); } catch (SamzaException ex) { String msg = "getTableSchema failed with exception "; lastErrorMsg = msg + ex.toString(); LOG.error(msg, ex); } return sqlSchema; }
private JoinInputNode.InputType getInputType(RelNode relNode, TranslatorContext context) { // NOTE: Any intermediate form of a join is always a stream. Eg: For the second level join of // stream-table-table join, the left side of the join is join output, which we always // assume to be a stream. The intermediate stream won't be an instance of EnumerableTableScan. // The join key(s) for the table could be an udf in which case the relNode would be LogicalProject. if (relNode instanceof EnumerableTableScan || relNode instanceof LogicalProject) { SqlIOConfig sourceTableConfig = resolveSourceConfigForTable(relNode, context); if (sourceTableConfig == null || !sourceTableConfig.getTableDescriptor().isPresent()) { return JoinInputNode.InputType.STREAM; } else if (sourceTableConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sourceTableConfig.getTableDescriptor().get() instanceof CachingTableDescriptor) { return JoinInputNode.InputType.REMOTE_TABLE; } else { return JoinInputNode.InputType.LOCAL_TABLE; } } else { return JoinInputNode.InputType.STREAM; } }
List<String> sourceParts = ssc.getSourceParts(); RelSchemaProvider relSchemaProvider = relSchemaProviders.get(ssc.getSource());
return new SqlIOConfig(systemName, streamName, fetchSystemConfigs(systemName), tableDescriptor);
public SqlIOConfig(String systemName, String streamName, List<String> sourceParts, Config systemConfig, TableDescriptor tableDescriptor) { HashMap<String, String> streamConfigs = new HashMap<>(systemConfig); this.source = getSourceFromSourceParts(sourceParts); this.sourceParts = sourceParts; this.systemStream = new SystemStream(systemName, streamName); String.format("%s is not set or empty for system %s", CFG_SAMZA_REL_CONVERTER, systemName)); if (isRemoteTable()) { samzaRelTableKeyConverterName = streamConfigs.get(CFG_SAMZA_REL_TABLE_KEY_CONVERTER); Validate.notEmpty(samzaRelTableKeyConverterName, streamConfigs.remove(CFG_REL_SCHEMA_PROVIDER); if (!isRemoteTable()) {
String getSourceName() { return SqlIOConfig.getSourceFromSourceParts(relNode.getTable().getQualifiedName()); }
String targetName = SqlIOConfig.getSourceFromSourceParts(tableNameParts); final String systemName = sinkConfig.getSystemName(); final String streamName = sinkConfig.getStreamName(); final String source = sinkConfig.getSource(); MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(targetName, queryId)); Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor(); if (!tableDescriptor.isPresent()) { OutputStream stm = outputMsgStreams.computeIfAbsent(source, v -> streamAppDesc.getOutputStream(osd)); Table outputTable = streamAppDesc.getTable(tableDescriptor.get()); if (outputTable == null) { String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource(); throw new SamzaException(msg);
private boolean isTable(RelNode relNode) { // NOTE: Any intermediate form of a join is always a stream. Eg: For the second level join of // stream-table-table join, the left side of the join is join output, which we always // assume to be a stream. The intermediate stream won't be an instance of EnumerableTableScan. if (relNode instanceof EnumerableTableScan) { return resolveSourceConfig(relNode).getTableDescriptor().isPresent(); } else { return false; } }
List<String> sourceParts = ssc.getSourceParts(); RelSchemaProvider relSchemaProvider = relSchemaProviders.get(ssc.getSource());
return new SqlIOConfig(systemName, streamName, fetchSystemConfigs(systemName), tableDescriptor);
public SqlIOConfig(String systemName, String streamName, List<String> sourceParts, Config systemConfig, TableDescriptor tableDescriptor) { HashMap<String, String> streamConfigs = new HashMap<>(systemConfig); this.systemName = systemName; this.streamName = streamName; this.source = getSourceFromSourceParts(sourceParts); this.sourceParts = sourceParts; this.systemStream = new SystemStream(systemName, streamName); this.tableDescriptor = Optional.ofNullable(tableDescriptor); samzaRelConverterName = streamConfigs.get(CFG_SAMZA_REL_CONVERTER); Validate.notEmpty(samzaRelConverterName, String.format("%s is not set or empty for system %s", CFG_SAMZA_REL_CONVERTER, systemName)); relSchemaProviderName = streamConfigs.get(CFG_REL_SCHEMA_PROVIDER); // Removing the Samza SQL specific configs to get the remaining Samza configs. streamConfigs.remove(CFG_SAMZA_REL_CONVERTER); streamConfigs.remove(CFG_REL_SCHEMA_PROVIDER); // Currently, only local table is supported. And it is assumed that all tables are local tables. if (tableDescriptor != null) { streamConfigs.put(String.format(StreamConfig.BOOTSTRAP_FOR_STREAM_ID(), streamName), "true"); streamConfigs.put(String.format(StreamConfig.CONSUMER_OFFSET_DEFAULT_FOR_STREAM_ID(), streamName), "oldest"); } config = new MapConfig(streamConfigs); }
private void sendToOutputStream(StreamApplicationDescriptor appDesc, TranslatorContext context, RelNode node, int queryId) { SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(SamzaSqlApplicationConfig.SAMZA_SYSTEM_LOG); MessageStream<SamzaSqlRelMessage> stream = context.getMessageStream(node.getId()); MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(SamzaSqlApplicationConfig.SAMZA_SYSTEM_LOG, queryId)); Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor(); if (!tableDescriptor.isPresent()) { KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>()); String systemName = sinkConfig.getSystemName(); DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new); GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamName(), noOpKVSerde); if (OutputMapFunction.logOutputStream == null) { OutputMapFunction.logOutputStream = appDesc.getOutputStream(osd); } outputStream.sendTo(OutputMapFunction.logOutputStream); } else { Table outputTable = appDesc.getTable(tableDescriptor.get()); if (outputTable == null) { String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource(); throw new SamzaException(msg); } outputStream.sendTo(outputTable); } } }
x -> initializePlugin("RelSchemaProvider", x.getRelSchemaProviderName(), staticConfig, CFG_FMT_REL_SCHEMA_PROVIDER_DOMAIN, (o, c) -> ((RelSchemaProviderFactory) o).create(x.getSystemStream(), c)))); x -> initializePlugin("SamzaRelConverter", x.getSamzaRelConverterName(), staticConfig, CFG_FMT_SAMZA_REL_CONVERTER_DOMAIN, (o, c) -> ((SamzaRelConverterFactory) o).create(x.getSystemStream(), relSchemaProvidersBySource.get(x.getSource()), c)))); x -> initializePlugin("SamzaRelTableKeyConverter", x.getSamzaRelTableKeyConverterName(), staticConfig, CFG_FMT_SAMZA_REL_TABLE_KEY_CONVERTER_DOMAIN, (o, c) -> ((SamzaRelTableKeyConverterFactory) o).create(x.getSystemStream(), c))));
StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor(); List<String> tableNameParts = tableScan.getTable().getQualifiedName(); String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts); final String systemName = sqlIOConfig.getSystemName(); final String streamId = sqlIOConfig.getStreamId(); final String source = sqlIOConfig.getSource(); final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor().get() instanceof CachingTableDescriptor);
private Table loadLocalTable(boolean isTablePosOnRight, List<Integer> tableKeyIds, LogicalJoin join, TranslatorContext context) { RelNode relNode = isTablePosOnRight ? join.getRight() : join.getLeft(); MessageStream<SamzaSqlRelMessage> relOutputStream = context.getMessageStream(relNode.getId()); SqlIOConfig sourceConfig = resolveSourceConfig(relNode); if (!sourceConfig.getTableDescriptor().isPresent()) { String errMsg = "Failed to resolve table source in join operation: node=" + relNode; log.error(errMsg); throw new SamzaException(errMsg); } // Create a table backed by RocksDb store with the fields in the join condition as composite key and relational // message as the value. Send the messages from the input stream denoted as 'table' to the created table store. Table<KV<SamzaSqlCompositeKey, SamzaSqlRelMessage>> table = context.getStreamAppDescriptor().getTable(sourceConfig.getTableDescriptor().get()); relOutputStream .map(m -> new KV(createSamzaSqlCompositeKey(m, tableKeyIds), m)) .sendTo(table); return table; } }