/** * Returns an Output defined by a dataset. * * @param datasetName the name of the output dataset */ public static Output ofDataset(String datasetName) { return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS); }
@Override public void prepareRun(BatchSinkContext context) throws Exception { OutputFormatProvider outputFormatProvider = new BasicOutputFormatProvider(TextOutputFormat.class.getCanonicalName(), ImmutableMap.of(TextOutputFormat.OUTDIR, config.dirName)); if (config.name != null) { Output output = Output.of(config.name, outputFormatProvider); output.alias(config.alias); context.addOutput(output); } else { context.addOutput(Output.of(config.alias, outputFormatProvider)); } }
private DatasetOutput(String name, Map<String, String> arguments, String namespace) { this(name, arguments); super.fromNamespace(namespace); }
@Override public void initialize() { MapReduceContext context = getContext(); context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_DATASET_NAME)) .fromNamespace(context.getRuntimeArguments().get(INPUT_DATASET_NS))); context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_DATASET_NAME)) .fromNamespace(context.getRuntimeArguments().get(OUTPUT_DATASET_NS))); Job hadoopJob = context.getHadoopJob(); hadoopJob.setMapperClass(IdentityMapper.class); hadoopJob.setNumReduceTasks(0); }
@Override public void addOutput(Output output) { if (output.getNamespace() != null && output.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) { output.getName(), getProgram().getNamespaceId())); String alias = output.getAlias(); if (this.outputs.containsKey(alias)) { throw new IllegalArgumentException("Output already configured: " + alias); output.getName(), output.getClass().getCanonicalName()));
@Override public final void prepareRun(BatchSinkContext context) throws InstantiationException { config.validate(); // set format specific properties. OutputFormatProvider outputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); // record field level lineage information // needs to happen before context.addOutput(), otherwise an external dataset without schema will be created. Schema schema = config.getSchema(); if (schema == null) { schema = context.getInputSchema(); } LineageRecorder lineageRecorder = new LineageRecorder(context, config.getReferenceName()); lineageRecorder.createExternalDataset(schema); if (schema != null && schema.getFields() != null && !schema.getFields().isEmpty()) { recordLineage(lineageRecorder, schema.getFields().stream().map(Schema.Field::getName).collect(Collectors.toList())); } Map<String, String> outputProperties = new HashMap<>(outputFormatProvider.getOutputFormatConfiguration()); outputProperties.putAll(getFileSystemProperties(context)); outputProperties.put(FileOutputFormat.OUTDIR, getOutputDir(context.getLogicalStartTime())); context.addOutput(Output.of(config.getReferenceName(), new SinkOutputFormatProvider(outputFormatProvider.getOutputFormatClassName(), outputProperties))); }
public ProvidedOutput(Output originalOutput, OutputFormatProvider outputFormatProvider) { this.output = originalOutput; this.outputFormatProvider = outputFormatProvider; this.outputFormatClassName = outputFormatProvider.getOutputFormatClassName(); this.outputFormatConfiguration = outputFormatProvider.getOutputFormatConfiguration(); if (outputFormatClassName == null) { throw new IllegalArgumentException(String.format("Output '%s' provided null as the output format", output.getAlias())); } if (outputFormatConfiguration == null) { throw new IllegalArgumentException(String.format("Output '%s' provided null as the output format configuration", output.getAlias())); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "inputFile"); // test using a stream with the same name, but aliasing it differently (so mapper gets the alias'd name) context.addInput(Input.ofDataset(PURCHASES, inputArgs), FileMapper.class); Map<String, String> output1Args = new HashMap<>(); FileSetArguments.setOutputPath(output1Args, "small_purchases"); context.addOutput(Output.ofDataset(SEPARATED_PURCHASES, output1Args).alias("small_purchases")); Map<String, String> output2Args = new HashMap<>(); FileSetArguments.setOutputPath(output2Args, "large_purchases"); context.addOutput(Output.ofDataset(SEPARATED_PURCHASES, output2Args).alias("large_purchases")); Job job = context.getHadoopJob(); job.setMapperClass(FileMapper.class); job.setNumReduceTasks(0); } }
@Override public void addOutput(Output output) { if (output.getNamespace() != null && output.getNamespace().equals(NamespaceId.SYSTEM.getNamespace()) && !getProgram().getNamespaceId().equals(NamespaceId.SYSTEM.getNamespace())) { output.getName(), getProgram().getNamespaceId())); String alias = output.getAlias(); if (this.outputs.containsKey(alias)) { throw new IllegalArgumentException("Output already configured: " + alias); output.getName(), output.getClass().getCanonicalName()));
context.addOutput(Output.of("test", new OutputFormatProvider() { @Override public String getOutputFormatClassName() {
public ProvidedOutput(Output originalOutput, OutputFormatProvider outputFormatProvider) { this.output = originalOutput; this.outputFormatProvider = outputFormatProvider; this.outputFormatClassName = outputFormatProvider.getOutputFormatClassName(); this.outputFormatConfiguration = outputFormatProvider.getOutputFormatConfiguration(); if (outputFormatClassName == null) { throw new IllegalArgumentException(String.format("Output '%s' provided null as the output format", output.getAlias())); } if (outputFormatConfiguration == null) { throw new IllegalArgumentException(String.format("Output '%s' provided null as the output format configuration", output.getAlias())); } }
@Override public void prepareRun(BatchSinkContext context) throws Exception { context.addOutput(Output.ofDataset(config.tableName)); if (!context.datasetExists(config.runtimeDatasetName)) { context.createDataset(config.runtimeDatasetName, KeyValueTable.class.getName(), DatasetProperties.EMPTY); } }
@Override public void initialize() throws Exception { // this write should be invalidated if any of the following fails KeyValueTable kvTable = getContext().getDataset("recorder"); kvTable.write("initialized", "true"); if (getContext().getRuntimeArguments().containsKey("failInput")) { getContext().addInput(Input.of("x", new FailingInputFormatProvider())); } if (getContext().getRuntimeArguments().containsKey("failOutput")) { getContext().addOutput(Output.of("x", new FailingOutputFormatProvider())); } } }
private Map<String, DatasetOutputCommitter> getDatasetOutputCommitters(List<ProvidedOutput> providedOutputs) { Map<String, DatasetOutputCommitter> datasetOutputCommitterOutputs = new HashMap<>(); for (ProvidedOutput providedOutput : providedOutputs) { if (providedOutput.getOutputFormatProvider() instanceof DatasetOutputCommitter) { datasetOutputCommitterOutputs.put(providedOutput.getOutput().getAlias(), (DatasetOutputCommitter) providedOutput.getOutputFormatProvider()); } } return datasetOutputCommitterOutputs; }
@Override public void prepareRun(BatchSinkContext context) throws DatasetManagementException { Map<String, String> properties = getProperties(); if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) { context.createDataset(properties.get(Properties.BatchReadableWritable.NAME), properties.get(Properties.BatchReadableWritable.TYPE), DatasetProperties.builder().addAll(properties).build()); } context.addOutput(Output.ofDataset(properties.get(Properties.BatchReadableWritable.NAME))); } }
private Map<String, DatasetOutputCommitter> getDatasetOutputCommitters(List<ProvidedOutput> providedOutputs) { Map<String, DatasetOutputCommitter> datasetOutputCommitterOutputs = new HashMap<>(); for (ProvidedOutput providedOutput : providedOutputs) { if (providedOutput.getOutputFormatProvider() instanceof DatasetOutputCommitter) { datasetOutputCommitterOutputs.put(providedOutput.getOutput().getAlias(), (DatasetOutputCommitter) providedOutput.getOutputFormatProvider()); } } return datasetOutputCommitterOutputs; }
@Override public void prepareRun(BatchSinkContext context) throws Exception { if (!context.datasetExists(config.tableName)) { context.createDataset(config.tableName, "table", DatasetProperties.EMPTY); } context.addOutput(Output.ofDataset(config.tableName)); }