/** * Sets a DynamicPartitioner class to be used during the output of a PartitionedFileSet. * * @param arguments the runtime arguments for a partitioned dataset * @param dynamicPartitionerClassName the name of the class to set */ public static void setDynamicPartitioner(Map<String, String> arguments, String dynamicPartitionerClassName) { setDynamicPartitioner(arguments, dynamicPartitionerClassName, DynamicPartitioner.PartitionWriteOption.CREATE); }
private void copyDynamicPartitionerArguments(Map<String, String> fromMap, Map<String, String> toMap) { String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(fromMap); DynamicPartitioner.PartitionWriteOption partitionWriteOption = PartitionedFileSetArguments.getDynamicPartitionerWriteOption(fromMap); PartitionedFileSetArguments.setDynamicPartitioner(toMap, dynamicPartitionerClassName, partitionWriteOption); PartitionedFileSetArguments.setDynamicPartitionerConcurrency( toMap, PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(fromMap)); // propagate output metadata into OutputFormatConfiguration so DynamicPartitionerOutputCommitter can assign // the metadata when it creates the partitions Map<String, String> metadata = PartitionedFileSetArguments.getOutputPartitionMetadata(fromMap); PartitionedFileSetArguments.setOutputPartitionMetadata(toMap, metadata); }
private void copyDynamicPartitionerArguments(Map<String, String> fromMap, Map<String, String> toMap) { String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(fromMap); DynamicPartitioner.PartitionWriteOption partitionWriteOption = PartitionedFileSetArguments.getDynamicPartitionerWriteOption(fromMap); PartitionedFileSetArguments.setDynamicPartitioner(toMap, dynamicPartitionerClassName, partitionWriteOption); PartitionedFileSetArguments.setDynamicPartitionerConcurrency( toMap, PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(fromMap)); // propagate output metadata into OutputFormatConfiguration so DynamicPartitionerOutputCommitter can assign // the metadata when it creates the partitions Map<String, String> metadata = PartitionedFileSetArguments.getOutputPartitionMetadata(fromMap); PartitionedFileSetArguments.setOutputPartitionMetadata(toMap, metadata); }
/** * Sets a DynamicPartitioner class to be used during the output of a PartitionedFileSet. * By default, {@link DynamicPartitioner.PartitionWriteOption.NEW_ONLY} will be used. * * @param arguments the runtime arguments for a partitioned dataset * @param dynamicPartitionerClass the class to set * @param <K> type of key * @param <V> type of value */ public static <K, V> void setDynamicPartitioner(Map<String, String> arguments, Class<? extends DynamicPartitioner<K, V>> dynamicPartitionerClass) { setDynamicPartitioner(arguments, dynamicPartitionerClass.getName()); }
@Test public void testDynamicPartitionerWriterConcurrency() { Map<String, String> arguments = new HashMap<>(); // should not be able to get or set the concurrency setting, without a dynamic partitioner set on the arguments try { PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(arguments); Assert.fail(); } catch (IllegalArgumentException expected) { } try { PartitionedFileSetArguments.setDynamicPartitionerConcurrency(arguments, false); Assert.fail(); } catch (IllegalArgumentException expected) { } // set a DynamicPartitioner PartitionedFileSetArguments.setDynamicPartitioner(arguments, TestDynamicPartitioner.class.getName()); // default value should be true Assert.assertTrue(PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(arguments)); // try set+get PartitionedFileSetArguments.setDynamicPartitionerConcurrency(arguments, false); Assert.assertFalse(PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(arguments)); PartitionedFileSetArguments.setDynamicPartitionerConcurrency(arguments, true); Assert.assertTrue(PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(arguments)); }
/** * Sets a DynamicPartitioner class to be used during the output of a PartitionedFileSet. * * @param arguments the runtime arguments for a partitioned dataset * @param dynamicPartitionerClass the class to set * @param partitionWriteOption options for the output partitions * @param <K> type of key * @param <V> type of value */ public static <K, V> void setDynamicPartitioner(Map<String, String> arguments, Class<? extends DynamicPartitioner<K, V>> dynamicPartitionerClass, DynamicPartitioner.PartitionWriteOption partitionWriteOption) { setDynamicPartitioner(arguments, dynamicPartitionerClass.getName(), partitionWriteOption); }
@Test public void testGetDynamicPartitionerClass() throws Exception { Map<String, String> arguments = new HashMap<>(); // two ways to set the DynamicPartitioner class - either the class object or the String (name) PartitionedFileSetArguments.setDynamicPartitioner(arguments, TestDynamicPartitioner.class); Assert.assertEquals(TestDynamicPartitioner.class.getName(), PartitionedFileSetArguments.getDynamicPartitioner(arguments)); arguments.clear(); PartitionedFileSetArguments.setDynamicPartitioner(arguments, TestDynamicPartitioner.class.getName()); Assert.assertEquals(TestDynamicPartitioner.class.getName(), PartitionedFileSetArguments.getDynamicPartitioner(arguments)); }
@Override protected void initialize() throws Exception { getContext().addInput(Input.ofDataset("input")); Map<String, String> outputArgs = new HashMap<>(); PartitionedFileSetArguments.setDynamicPartitioner(outputArgs, KeyPartitioner.class); String[] outputs = getContext().getRuntimeArguments().get("outputs").split(" "); for (String outputDataset : outputs) { getContext().addOutput(Output.ofDataset(outputDataset, outputArgs)); } Job job = getContext().getHadoopJob(); job.setMapperClass(DynamicMapper.class); job.setNumReduceTasks(0); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(TokenMapper.class); job.setNumReduceTasks(0); String inputText = getContext().getRuntimeArguments().get("input.text"); Preconditions.checkNotNull(inputText); KeyValueTable kvTable = getContext().getDataset(INPUT); kvTable.write("key", inputText); context.addInput(Input.ofDataset(INPUT, kvTable.getSplits(1, null, null))); String outputDatasets = getContext().getRuntimeArguments().get("output.datasets"); outputDatasets = outputDatasets != null ? outputDatasets : PFS; for (String outputName : outputDatasets.split(",")) { String outputPartition = getContext().getRuntimeArguments().get(outputName + ".output.partition"); PartitionKey outputPartitionKey = outputPartition == null ? null : PartitionKey.builder().addField("number", Integer.parseInt(outputPartition)).build(); Map<String, String> outputArguments = new HashMap<>(); if (outputPartitionKey != null) { PartitionedFileSetArguments.setOutputPartitionKey(outputArguments, outputPartitionKey); } else { PartitionedFileSetArguments.setDynamicPartitioner(outputArguments, KeyPartitioner.class); } context.addOutput(Output.ofDataset(outputName, outputArguments)); } }