@Test @SuppressWarnings("deprecation") public void testJobOutputDatasetSignaledReady() throws Exception { Assume.assumeTrue(!Hadoop.isHadoop1()); populateInputDataset(); populateOutputDataset(); // existing output will be overwritten Job job = new Job(); DatasetKeyInputFormat.configure(job).readFrom(inputDataset).withType(GenericData.Record.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenericStatsReducer.class); DatasetKeyOutputFormat.configure(job).overwrite(outputDataset).withType(GenericData.Record.class); Assert.assertTrue(job.waitForCompletion(true)); Assert.assertTrue("Output dataset should be signaled ready", ((Signalable)outputDataset).isReady()); }
AvroJob.setOutputKeySchema(job, new Schema.Parser().parse(testGenericEntity)); DatasetKeyOutputFormat.configure(job).writeTo(outputDataset);
@Test @SuppressWarnings("deprecation") public void testJobAppend() throws Exception { populateInputDataset(); populateOutputDataset(); // existing output will be overwritten Job job = new Job(); DatasetKeyInputFormat.configure(job).readFrom(inputDataset).withType(GenericData.Record.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenericStatsReducer.class); DatasetKeyOutputFormat.configure(job).appendTo(outputDataset).withType(GenericData.Record.class); Assert.assertTrue(job.waitForCompletion(true)); checkOutput(true); }
AvroJob.setOutputKeySchema(job, new Schema.Parser().parse(testGenericEntity)); DatasetKeyOutputFormat.configure(job).writeTo(outputDataset);
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given dataset or view URI after removing any existing data. * <p> * The underlying dataset implementation must support View#deleteAll for * the view identified by the URI or the job will fail. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder overwrite(URI uri) { setOverwrite(); return writeTo(uri); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given dataset or view URI string after removing any existing data. * <p> * The underlying dataset implementation must support View#deleteAll for * the view identified by the URI string or the job will fail. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI string * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder overwrite(String uri) { setOverwrite(); return writeTo(uri); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given dataset or view URI string after removing any existing data. * <p> * The underlying dataset implementation must support View#deleteAll for * the view identified by the URI string or the job will fail. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI string * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder overwrite(String uri) { setOverwrite(); return writeTo(uri); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given dataset or view URI after removing any existing data. * <p> * The underlying dataset implementation must support View#deleteAll for * the view identified by the URI or the job will fail. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder overwrite(URI uri) { setOverwrite(); return writeTo(uri); }
@Test @SuppressWarnings("deprecation") public void testSignalReadyOutputView() throws Exception { Assume.assumeTrue(!Hadoop.isHadoop1()); populateInputDataset(); populateOutputDataset(); // existing output will be overwritten Job job = new Job(); DatasetKeyInputFormat.configure(job).readFrom(inputDataset).withType(GenericData.Record.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenericStatsReducer.class); View<Record> outputView = outputDataset.with("name", "apple", "banana", "carrot"); DatasetKeyOutputFormat.configure(job).appendTo(outputView).withType(GenericData.Record.class); Assert.assertTrue(job.waitForCompletion(true)); Assert.assertFalse("Output dataset should not be signaled ready", ((Signalable)outputDataset).isReady()); Assert.assertTrue("Output view should be signaled ready", ((Signalable)outputView).isReady()); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to append to the * given dataset or view URI, leaving any existing data intact. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI string * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder appendTo(String uri) { setAppend(); return writeTo(uri); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to append to the * given dataset or view URI, leaving any existing data intact. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder appendTo(URI uri) { setAppend(); return writeTo(uri); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given {@link Dataset} or {@link View} instance after removing any * existing data. * <p> * The underlying dataset implementation must support View#deleteAll for * the {@code view} or the job will fail. * * @param view a dataset or view * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder overwrite(View<?> view) { setOverwrite(); return writeTo(view); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given {@link Dataset} or {@link View} instance after removing any * existing data. * <p> * The underlying dataset implementation must support View#deleteAll for * the {@code view} or the job will fail. * * @param view a dataset or view * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder overwrite(View<?> view) { setOverwrite(); return writeTo(view); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to append to the * given dataset or view URI, leaving any existing data intact. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder appendTo(URI uri) { setAppend(); return writeTo(uri); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to append to the * given dataset or view URI, leaving any existing data intact. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI string * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder appendTo(String uri) { setAppend(); return writeTo(uri); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder writeTo(View<?> view) { if (view instanceof FileSystemDataset) { FileSystemDataset dataset = (FileSystemDataset) view; conf.set(KITE_PARTITION_DIR, String.valueOf(dataset.getDescriptor().getLocation())); } withType(view.getType()); return writeTo(view.getUri()); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder writeTo(View<?> view) { if (view instanceof FileSystemDataset) { FileSystemDataset dataset = (FileSystemDataset) view; conf.set(KITE_PARTITION_DIR, String.valueOf(dataset.getDescriptor().getLocation())); } withType(view.getType()); return writeTo(view.getUri()); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given dataset or view URI string. * <p> * URI formats are defined by {@link Dataset} implementations, but must * begin with "dataset:" or "view:". For more information, see * {@link Datasets}. * * @param uri a dataset or view URI string * @return this for method chaining */ public ConfigBuilder writeTo(String uri) { return writeTo(URI.create(uri)); }
@Test @SuppressWarnings("deprecation") public void testJobOverwrite() throws Exception { populateInputDataset(); populateOutputDataset(); // existing output will be overwritten Job job = new Job(); DatasetKeyInputFormat.configure(job).readFrom(inputDataset).withType(GenericData.Record.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(GenericStatsReducer.class); DatasetKeyOutputFormat.configure(job).overwrite(outputDataset).withType(GenericData.Record.class); Assert.assertTrue(job.waitForCompletion(true)); checkOutput(false); }