/** * Suffix the alias of {@link Output} so that aliases of outputs are unique. */ private Output suffixOutput(Output output) { String suffixedAlias = String.format("%s-%s", output.getAlias(), UUID.randomUUID()); return output.alias(suffixedAlias); }
@Override public void prepareRun(BatchSinkContext context) throws Exception { OutputFormatProvider outputFormatProvider = new BasicOutputFormatProvider(TextOutputFormat.class.getCanonicalName(), ImmutableMap.of(TextOutputFormat.OUTDIR, config.dirName)); if (config.name != null) { Output output = Output.of(config.name, outputFormatProvider); output.alias(config.alias); context.addOutput(output); } else { context.addOutput(Output.of(config.alias, outputFormatProvider)); } }
@Override public void initialize() throws Exception { super.initialize(); getContext().addOutput(Output.ofDataset(SEPARATED_PURCHASES).alias("small_purchases")); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "inputFile"); // test using a stream with the same name, but aliasing it differently (so mapper gets the alias'd name) context.addInput(Input.ofDataset(PURCHASES, inputArgs), FileMapper.class); Map<String, String> output1Args = new HashMap<>(); FileSetArguments.setOutputPath(output1Args, "small_purchases"); context.addOutput(Output.ofDataset(SEPARATED_PURCHASES, output1Args).alias("small_purchases")); Map<String, String> output2Args = new HashMap<>(); FileSetArguments.setOutputPath(output2Args, "large_purchases"); context.addOutput(Output.ofDataset(SEPARATED_PURCHASES, output2Args).alias("large_purchases")); Job job = context.getHadoopJob(); job.setMapperClass(FileMapper.class); job.setNumReduceTasks(0); } }