@Override public void initialize(BatchRuntimeContext context) throws Exception { super.initialize(context); }
@Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) { super.configurePipeline(pipelineConfigurer); pipelineConfigurer.createDataset(config.tableName, Table.class); if (!config.containsMacro("runtimeDatasetName")) { pipelineConfigurer.createDataset(config.runtimeDatasetName, KeyValueTable.class.getName(), DatasetProperties.EMPTY); } }
@Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) { super.configurePipeline(pipelineConfigurer); pipelineConfigurer.createDataset(config.tableName, Table.class); if (config.schema != null) { try { pipelineConfigurer.getStageConfigurer().setOutputSchema(Schema.parseJson(config.schema)); } catch (IOException e) { throw new IllegalArgumentException("Could not parse schema " + config.schema, e); } } }
@Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) { super.configurePipeline(pipelineConfigurer); excelInputreaderConfig.validate(); if (Strings.isNullOrEmpty(excelInputreaderConfig.columnList) && Strings.isNullOrEmpty(excelInputreaderConfig.outputSchema)) { throw new IllegalArgumentException("'Field Name Schema Type Mapping' input cannot be empty when the empty " + "input value of 'Columns To Be Extracted' is provided."); } createDatasets(pipelineConfigurer, null); init(); getOutputSchema(); pipelineConfigurer.getStageConfigurer().setOutputSchema(outputSchema); }
@Override public void initialize(BatchRuntimeContext context) throws Exception { super.initialize(context); if (config.schema != null) { // should never happen, just done to test App correctness in unit tests Schema outputSchema = Schema.parseJson(config.schema); if (!outputSchema.equals(context.getOutputSchema())) { throw new IllegalStateException("Output schema does not match what was set at configure time."); } } }
@Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) { super.configurePipeline(pipelineConfigurer); streamBatchConfig.validate(); if (!streamBatchConfig.containsMacro(Properties.Stream.NAME)) { pipelineConfigurer.addStream(new Stream(streamBatchConfig.name)); } // if no format is specified then default schema is used, if otherwise its based on format spec. if (streamBatchConfig.format == null) { pipelineConfigurer.getStageConfigurer().setOutputSchema(DEFAULT_SCHEMA); } else if (streamBatchConfig.getFormatSpec() != null && streamBatchConfig.getFormatSpec().getSchema() != null) { List<Schema.Field> fields = Lists.newArrayList(); fields.add(Schema.Field.of("ts", Schema.of(Schema.Type.LONG))); fields.add(Schema.Field.of("headers", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING)))); fields.addAll(streamBatchConfig.getFormatSpec().getSchema().getFields()); pipelineConfigurer.getStageConfigurer().setOutputSchema(Schema.recordOf("event", fields)); } }
@Override public Iterable<RecordInfo<Object>> call(Tuple2<Object, Object> input) throws Exception { if (transform == null) { BatchSource<Object, Object, Object> batchSource = pluginFunctionContext.createPlugin(); batchSource.initialize(pluginFunctionContext.createBatchRuntimeContext()); transform = new TrackedTransform<>(pluginFunctionContext.getDataTracer().isEnabled() ? new LimitingTransform<>(batchSource, numOfRecordsPreview) : batchSource, pluginFunctionContext.createStageMetrics(), pluginFunctionContext.getDataTracer(), pluginFunctionContext.getStageStatisticsCollector()); emitter = new CombinedEmitter<>(pluginFunctionContext.getStageName()); } emitter.reset(); KeyValue<Object, Object> inputKV = new KeyValue<>(input._1(), input._2()); transform.transform(inputKV, emitter); return emitter.getEmitted(); } }