FieldOperation joinOperation = new FieldTransformOperation("Join", JOIN_OPERATION_DESCRIPTION, joinInputs, new ArrayList<>(joinOutputs)); operations.add(joinOperation); FieldOperation identity = new FieldTransformOperation(operationName, IDENTITY_OPERATION_DESCRIPTION, Collections.singletonList(stagedInputField), outputFieldInfo.name); FieldOperation transform = new FieldTransformOperation(operationName, RENAME_OPERATION_DESCRIPTION, Collections.singletonList(stagedInputField), outputFieldInfo.name);
@Override public void prepareRun(StageSubmitterContext context) throws Exception { super.prepareRun(context); List<String> inputFields = new ArrayList<>(); List<String> outputFields = new ArrayList<>(); Schema inputSchema = context.getInputSchema(); if (SchemaValidator.canRecordLineage(inputSchema, "input")) { //noinspection ConstantConditions inputFields = inputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); } Schema outputSchema = context.getOutputSchema(); if (SchemaValidator.canRecordLineage(outputSchema, "output")) { //noinspection ConstantConditions outputFields = outputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); } FieldOperation dataPrepOperation = new FieldTransformOperation("Python", config.script, inputFields, outputFields); context.record(Collections.singletonList(dataPrepOperation)); }
@Override public void prepareRun(StageSubmitterContext context) throws Exception { super.prepareRun(context); List<String> inputFields = new ArrayList<>(); List<String> outputFields = new ArrayList<>(); Schema inputSchema = context.getInputSchema(); if (SchemaValidator.canRecordLineage(inputSchema, "input")) { //noinspection ConstantConditions inputFields = inputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); } Schema outputSchema = context.getOutputSchema(); if (SchemaValidator.canRecordLineage(outputSchema, "output")) { //noinspection ConstantConditions outputFields = outputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); } FieldOperation dataPrepOperation = new FieldTransformOperation("JavaScript", config.script, inputFields, outputFields); context.record(Collections.singletonList(dataPrepOperation)); }
@Override public void prepareRun(BatchAggregatorContext context) throws Exception { super.prepareRun(context); LinkedList<FieldOperation> fllOperations = new LinkedList<>(); // in configurePipeline all the necessary checks have been performed already to set output schema if (SchemaValidator.canRecordLineage(context.getOutputSchema(), "output")) { Schema inputSchema = context.getInputSchema(); // for every function record the field level operation details for (GroupByConfig.FunctionInfo functionInfo : conf.getAggregates()) { Schema.Field outputSchemaField = getOutputSchemaField(functionInfo, inputSchema); String operationName = String.format("Group %s", functionInfo.getField()); String description = String.format("Aggregate function applied: '%s'.", functionInfo.getFunction()); FieldOperation operation = new FieldTransformOperation(operationName, description, Collections.singletonList(functionInfo.getField()), outputSchemaField.getName()); fllOperations.add(operation); } } context.record(fllOperations); }