@Test @SuppressWarnings("deprecation") public void testJobAppend() throws Exception { populateInputDataset(); populateOutputDataset(); // existing output will be overwritten Job job = new Job(); DatasetKeyInputFormat.configure(job).readFrom(inputDataset).withType(GenericData.Record.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenericStatsReducer.class); DatasetKeyOutputFormat.configure(job).appendTo(outputDataset).withType(GenericData.Record.class); Assert.assertTrue(job.waitForCompletion(true)); checkOutput(true); }
@Test @SuppressWarnings("deprecation") public void testSignalReadyOutputView() throws Exception { Assume.assumeTrue(!Hadoop.isHadoop1()); populateInputDataset(); populateOutputDataset(); // existing output will be overwritten Job job = new Job(); DatasetKeyInputFormat.configure(job).readFrom(inputDataset).withType(GenericData.Record.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenericStatsReducer.class); View<Record> outputView = outputDataset.with("name", "apple", "banana", "carrot"); DatasetKeyOutputFormat.configure(job).appendTo(outputView).withType(GenericData.Record.class); Assert.assertTrue(job.waitForCompletion(true)); Assert.assertFalse("Output dataset should not be signaled ready", ((Signalable)outputDataset).isReady()); Assert.assertTrue("Output view should be signaled ready", ((Signalable)outputView).isReady()); }
@Test public void testSetupJobIsIdempotent() { DatasetKeyOutputFormat.MergeOutputCommitter<Object> outputCommitter = new DatasetKeyOutputFormat.MergeOutputCommitter<Object>(); Configuration conf = DefaultConfiguration.get(); DatasetKeyOutputFormat.configure(conf).appendTo(outputDataset); JobID jobId = new JobID("jt", 42); JobContext context = Hadoop.JobContext.ctor.newInstance(conf, jobId); // setup the job outputCommitter.setupJob(context); // call setup again to simulate an ApplicationMaster restart outputCommitter.setupJob(context); }
public DatasetTarget(View<E> view) { this.view = view; Configuration temp = emptyConf(); // use appendTo since handleExisting checks for existing data DatasetKeyOutputFormat.configure(temp).appendTo(view); this.formatBundle = outputBundle(temp); this.uri = view.getDataset().getUri(); }
public DatasetTarget(URI uri) { this.uri = uri; Configuration temp = emptyConf(); // use appendTo since handleExisting checks for existing data DatasetKeyOutputFormat.configure(temp).appendTo(uri); this.formatBundle = outputBundle(temp); }