FieldOperation joinOperation = new FieldTransformOperation("Join", JOIN_OPERATION_DESCRIPTION, joinInputs, new ArrayList<>(joinOutputs)); operations.add(joinOperation); FieldOperation identity = new FieldTransformOperation(operationName, IDENTITY_OPERATION_DESCRIPTION, Collections.singletonList(stagedInputField), outputFieldInfo.name); FieldOperation transform = new FieldTransformOperation(operationName, RENAME_OPERATION_DESCRIPTION, Collections.singletonList(stagedInputField), outputFieldInfo.name);
case TRANSFORM: FieldTransformOperation transform = (FieldTransformOperation) fieldOperation; List<InputField> inputFields = createInputFields(transform.getInputFields(), stageName, processedOperations); newOperation = new TransformOperation(newOperationName, transform.getDescription(), inputFields, transform.getOutputFields()); currentOperationOutputs.addAll(transform.getOutputFields()); break; case WRITE:
case TRANSFORM: FieldTransformOperation transform = (FieldTransformOperation) pipelineOperation; validateInputs(pipelineOperation.getName(), transform.getInputFields(), validInputsSoFar); updateInvalidOutputs(transform.getInputFields(), unusedOutputs, redundantOutputs); validInputsSoFar.addAll(transform.getOutputFields()); for (String field : transform.getOutputFields()) { List<String> origins = unusedOutputs.computeIfAbsent(field, k -> new ArrayList<>()); origins.add(pipelineOperation.getName());
@Override public void prepareRun(StageSubmitterContext context) throws Exception { super.prepareRun(context); List<String> inputFields = new ArrayList<>(); List<String> outputFields = new ArrayList<>(); Schema inputSchema = context.getInputSchema(); if (SchemaValidator.canRecordLineage(inputSchema, "input")) { //noinspection ConstantConditions inputFields = inputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); } Schema outputSchema = context.getOutputSchema(); if (SchemaValidator.canRecordLineage(outputSchema, "output")) { //noinspection ConstantConditions outputFields = outputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); } FieldOperation dataPrepOperation = new FieldTransformOperation("Python", config.script, inputFields, outputFields); context.record(Collections.singletonList(dataPrepOperation)); }
@Override public void prepareRun(StageSubmitterContext context) throws Exception { super.prepareRun(context); List<String> inputFields = new ArrayList<>(); List<String> outputFields = new ArrayList<>(); Schema inputSchema = context.getInputSchema(); if (SchemaValidator.canRecordLineage(inputSchema, "input")) { //noinspection ConstantConditions inputFields = inputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); } Schema outputSchema = context.getOutputSchema(); if (SchemaValidator.canRecordLineage(outputSchema, "output")) { //noinspection ConstantConditions outputFields = outputSchema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); } FieldOperation dataPrepOperation = new FieldTransformOperation("JavaScript", config.script, inputFields, outputFields); context.record(Collections.singletonList(dataPrepOperation)); }
@Override public void prepareRun(BatchAggregatorContext context) throws Exception { super.prepareRun(context); LinkedList<FieldOperation> fllOperations = new LinkedList<>(); // in configurePipeline all the necessary checks have been performed already to set output schema if (SchemaValidator.canRecordLineage(context.getOutputSchema(), "output")) { Schema inputSchema = context.getInputSchema(); // for every function record the field level operation details for (GroupByConfig.FunctionInfo functionInfo : conf.getAggregates()) { Schema.Field outputSchemaField = getOutputSchemaField(functionInfo, inputSchema); String operationName = String.format("Group %s", functionInfo.getField()); String description = String.format("Aggregate function applied: '%s'.", functionInfo.getFunction()); FieldOperation operation = new FieldTransformOperation(operationName, description, Collections.singletonList(functionInfo.getField()), outputSchemaField.getName()); fllOperations.add(operation); } } context.record(fllOperations); }