private Input suffixInput(Input input) { String suffixedAlias = String.format("%s-%s", input.getAlias(), UUID.randomUUID()); return input.alias(suffixedAlias); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Map<String, String> inputArgs = new HashMap<>(); FileSetArguments.setInputPath(inputArgs, "inputFile"); context.addInput(Input.ofDataset(PURCHASES, inputArgs), FileMapper.class); // A second input, aliasing so mapper gets the alias'd name context.addInput(Input.ofDataset(PURCHASES2, inputArgs).alias("secondPurchases"), FileMapper2.class); // since we set a Mapper class on the job itself, omitting the mapper in the addInput call will default to that context.addInput(Input.ofDataset(CUSTOMERS, inputArgs)); Map<String, String> outputArgs = new HashMap<>(); FileSetArguments.setOutputPath(outputArgs, "output"); context.addOutput(Output.ofDataset(OUTPUT_DATASET, outputArgs)); Job job = context.getHadoopJob(); job.setMapperClass(FileMapper.class); job.setReducerClass(FileReducer.class); } }
private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) { String datasetName = datasetInput.getName(); Map<String, String> datasetArgs = datasetInput.getArguments(); // keep track of the original alias to set it on the created Input before returning it String originalAlias = datasetInput.getAlias(); Dataset dataset; if (datasetInput.getNamespace() == null) { dataset = getDataset(datasetName, datasetArgs, AccessType.READ); } else { dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ); } DatasetInputFormatProvider datasetInputFormatProvider = new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset, datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class); return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias); }
private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) { String datasetName = datasetInput.getName(); Map<String, String> datasetArgs = datasetInput.getArguments(); // keep track of the original alias to set it on the created Input before returning it String originalAlias = datasetInput.getAlias(); Dataset dataset; if (datasetInput.getNamespace() == null) { dataset = getDataset(datasetName, datasetArgs, AccessType.READ); } else { dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ); } DatasetInputFormatProvider datasetInputFormatProvider = new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset, datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class); return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias); }