/** * Returns an Output defined by a dataset. * * @param datasetName the name of the output dataset */ public static Output ofDataset(String datasetName) { return ofDataset(datasetName, RuntimeArguments.NO_ARGUMENTS); }
@Override public void prepareRun(BatchSinkContext context) throws Exception { context.addOutput(Output.ofDataset(config.tableName)); if (!context.datasetExists(config.runtimeDatasetName)) { context.createDataset(config.runtimeDatasetName, KeyValueTable.class.getName(), DatasetProperties.EMPTY); } }
@Override public void prepareRun(BatchSinkContext context) throws DatasetManagementException { Map<String, String> properties = getProperties(); if (!context.datasetExists(properties.get(Properties.BatchReadableWritable.NAME))) { context.createDataset(properties.get(Properties.BatchReadableWritable.NAME), properties.get(Properties.BatchReadableWritable.TYPE), DatasetProperties.builder().addAll(properties).build()); } context.addOutput(Output.ofDataset(properties.get(Properties.BatchReadableWritable.NAME))); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(ReaderMapper.class); job.setNumReduceTasks(0); String row = context.getRuntimeArguments().get(ROW_TO_WRITE); job.getConfiguration().set(ROW_TO_WRITE, row); context.addInput(Input.ofDataset(PARTITIONED)); context.addOutput(Output.ofDataset(OUTPUT)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(ReaderMapper.class); job.setNumReduceTasks(0); String row = context.getRuntimeArguments().get(ROW_TO_WRITE); job.getConfiguration().set(ROW_TO_WRITE, row); context.addInput(Input.ofDataset(TIME_PARTITIONED)); context.addOutput(Output.ofDataset(OUTPUT)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); // the inputs will be set in child classes context.addOutput(Output.ofDataset("output")); Job job = context.getHadoopJob(); job.setReducerClass(SomeReducer.class); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(NoOpMapper.class); job.setReducerClass(NoOpReducer.class); context.addInput(Input.ofDataset(DATASET_NAME)); context.addOutput(Output.ofDataset(DATASET_NAME2)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(ObjectStoreMapper.class); job.setReducerClass(KeyValueStoreReducer.class); context.addInput(Input.ofDataset("keys")); context.addOutput(Output.ofDataset("count")); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); context.addInput(Input.ofDataset(INPUT)); context.addOutput(Output.ofDataset(PARTITIONED)); } }
@Override public void initialize() { MapReduceContext context = getContext(); context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_KEY))); context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_KEY))); Job hadoopJob = context.getHadoopJob(); hadoopJob.setMapperClass(IdentityMapper.class); hadoopJob.setNumReduceTasks(0); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); context.addInput(Input.ofDataset(INPUT)); context.addOutput(Output.ofDataset(TIME_PARTITIONED)); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(DummyMapper.class); job.setReducerClass(DummyReducer.class); context.addInput(Input.ofDataset("foo")); context.addOutput(Output.ofDataset("bar")); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); context.addInput(Input.ofDataset("input")); context.addOutput(Output.ofDataset("output")); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(Tokenizer.class); job.setReducerClass(Counter.class); job.setNumReduceTasks(1); context.addInput(Input.ofDataset("lines")); context.addOutput(Output.ofDataset("counts")); // truncate the output dataset context.getAdmin().truncateDataset("counts"); }
@Override public void initialize() { MapReduceContext context = getContext(); context.addInput(Input.ofDataset(context.getRuntimeArguments().get(INPUT_DATASET_NAME)) .fromNamespace(context.getRuntimeArguments().get(INPUT_DATASET_NS))); context.addOutput(Output.ofDataset(context.getRuntimeArguments().get(OUTPUT_DATASET_NAME)) .fromNamespace(context.getRuntimeArguments().get(OUTPUT_DATASET_NS))); Job hadoopJob = context.getHadoopJob(); hadoopJob.setMapperClass(IdentityMapper.class); hadoopJob.setNumReduceTasks(0); }
@Override protected void initialize() throws Exception { getContext().addInput(Input.ofDataset("input")); Map<String, String> outputArgs = new HashMap<>(); PartitionedFileSetArguments.setDynamicPartitioner(outputArgs, KeyPartitioner.class); String[] outputs = getContext().getRuntimeArguments().get("outputs").split(" "); for (String outputDataset : outputs) { getContext().addOutput(Output.ofDataset(outputDataset, outputArgs)); } Job job = getContext().getHadoopJob(); job.setMapperClass(DynamicMapper.class); job.setNumReduceTasks(0); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); context.addInput(Input.ofDataset(DATASET_NAME2)); context.addOutput(Output.ofDataset(DATASET_NAME)); } }
@Override public void initialize() throws Exception { super.initialize(); getContext().addOutput(Output.ofDataset(SEPARATED_PURCHASES).alias("small_purchases")); } }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setReducerClass(FileReducer.class); // user can opt to define the mapper class through our APIs, instead of directly on the job context.addInput(Input.ofDataset(context.getSpecification().getProperty("input")), FileMapper.class); context.addOutput(Output.ofDataset(context.getSpecification().getProperty("output"))); } }
@Override protected void initialize() throws Exception { Job job = getContext().getHadoopJob(); job.setMapperClass(ClusterNameMapper.class); job.setReducerClass(ClusterNameReducer.class); getContext().addInput(Input.ofDataset(INPUT_FILE_SET)); getContext().addOutput(Output.ofDataset(OUTPUT_FILE_SET)); WorkflowInfo workflowInfo = getContext().getWorkflowInfo(); String prefix = workflowInfo == null ? "" : workflowInfo.getName() + "."; clusterNameTable.write(prefix + "mr.client.cluster.name", getContext().getClusterName()); }