private void copyDynamicPartitionerArguments(Map<String, String> fromMap, Map<String, String> toMap) { String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(fromMap); DynamicPartitioner.PartitionWriteOption partitionWriteOption = PartitionedFileSetArguments.getDynamicPartitionerWriteOption(fromMap); PartitionedFileSetArguments.setDynamicPartitioner(toMap, dynamicPartitionerClassName, partitionWriteOption); PartitionedFileSetArguments.setDynamicPartitionerConcurrency( toMap, PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(fromMap)); // propagate output metadata into OutputFormatConfiguration so DynamicPartitionerOutputCommitter can assign // the metadata when it creates the partitions Map<String, String> metadata = PartitionedFileSetArguments.getOutputPartitionMetadata(fromMap); PartitionedFileSetArguments.setOutputPartitionMetadata(toMap, metadata); }
/** * Computes and returns the input partition keys given by the partition filter - if present. Otherwise, get the list * of partition keys explicitly specified in the runtime arguments. * Stores the result in a cache and returns it. */ private Collection<PartitionKey> getInputKeys() { if (inputKeysCache != null) { return inputKeysCache.get(); } Collection<PartitionKey> inputKeys = computeInputKeys(); if (inputKeys == null) { inputKeys = PartitionedFileSetArguments.getInputPartitionKeys(runtimeArguments); } inputKeysCache = new AtomicReference<>(inputKeys); return inputKeys; }
/** * Sets a partition as input for a PartitionedFileSet. If both a PartitionFilter and Partition(s) are specified, the * PartitionFilter takes precedence and the specified Partition(s) will be ignored. * * @param arguments the runtime arguments for a partitioned dataset * @param partition the partition to add as input */ public static void addInputPartition(Map<String, String> arguments, Partition partition) { addInputPartitions(arguments, Collections.singletonList(partition)); }
public Map<String, String> getOutputArguments(long snapshotTime, Map<String, String> otherProperties) { Map<String, String> args = new HashMap<>(); args.putAll(otherProperties); PartitionKey outputKey = PartitionKey.builder().addLongField(SNAPSHOT_FIELD, snapshotTime).build(); PartitionedFileSetArguments.setOutputPartitionKey(args, outputKey); return args; }
PartitionedFileSetArguments.setInputPartitionFilter( inputArgs, PartitionFilter.builder().addRangeCondition("x", "na", "nx").build()); Map<String, String> outputArgs = new HashMap<>(); PartitionKey outputKey = PartitionKey.builder().addStringField("x", "xx").build(); PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey); Map<String, String> args = new HashMap<>(); args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", inputArgs));
protected static Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments, Partitioning partitioning) { if (FileSetArguments.getOutputPath(arguments) == null) { PartitionKey key = PartitionedFileSetArguments.getOutputPartitionKey(arguments, partitioning); // we need to copy the map, to avoid modifying the passed-in map arguments = Maps.newHashMap(arguments); if (key != null) { FileSetArguments.setOutputPath(arguments, PartitionedFileSetDataset.getOutputPath(key, partitioning)); } else if (PartitionedFileSetArguments.getDynamicPartitioner(arguments) != null) { // when using DynamicPartitioner, use the baseLocation of the fileSet as the output location FileSetArguments.setBaseOutputPath(arguments); } } return arguments; }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(TokenMapper.class); job.setNumReduceTasks(0); String inputText = getContext().getRuntimeArguments().get("input.text"); Preconditions.checkNotNull(inputText); KeyValueTable kvTable = getContext().getDataset(INPUT); kvTable.write("key", inputText); context.addInput(Input.ofDataset(INPUT, kvTable.getSplits(1, null, null))); String outputDatasets = getContext().getRuntimeArguments().get("output.datasets"); outputDatasets = outputDatasets != null ? outputDatasets : PFS; for (String outputName : outputDatasets.split(",")) { String outputPartition = getContext().getRuntimeArguments().get(outputName + ".output.partition"); PartitionKey outputPartitionKey = outputPartition == null ? null : PartitionKey.builder().addField("number", Integer.parseInt(outputPartition)).build(); Map<String, String> outputArguments = new HashMap<>(); if (outputPartitionKey != null) { PartitionedFileSetArguments.setOutputPartitionKey(outputArguments, outputPartitionKey); } else { PartitionedFileSetArguments.setDynamicPartitioner(outputArguments, KeyPartitioner.class); } context.addOutput(Output.ofDataset(outputName, outputArguments)); } }
@Test public void testSetGetOutputPartitionKey() throws Exception { Map<String, String> arguments = new HashMap<>(); PartitionKey key = PartitionKey.builder() .addIntField("i", 42) .addLongField("l", 17L) .addStringField("s", "x") .build(); PartitionedFileSetArguments.setOutputPartitionKey(arguments, key); Assert.assertEquals(key, PartitionedFileSetArguments.getOutputPartitionKey(arguments, PARTITIONING)); }
@Test public void testDynamicPartitionerWriterConcurrency() { Map<String, String> arguments = new HashMap<>(); // should not be able to get or set the concurrency setting, without a dynamic partitioner set on the arguments try { PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(arguments); Assert.fail(); } catch (IllegalArgumentException expected) { } try { PartitionedFileSetArguments.setDynamicPartitionerConcurrency(arguments, false); Assert.fail(); } catch (IllegalArgumentException expected) { } // set a DynamicPartitioner PartitionedFileSetArguments.setDynamicPartitioner(arguments, TestDynamicPartitioner.class.getName()); // default value should be true Assert.assertTrue(PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(arguments)); // try set+get PartitionedFileSetArguments.setDynamicPartitionerConcurrency(arguments, false); Assert.assertFalse(PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(arguments)); PartitionedFileSetArguments.setDynamicPartitionerConcurrency(arguments, true); Assert.assertTrue(PartitionedFileSetArguments.isDynamicPartitionerConcurrencyAllowed(arguments)); }
@Test public void testGetDynamicPartitionerClass() throws Exception { Map<String, String> arguments = new HashMap<>(); // two ways to set the DynamicPartitioner class - either the class object or the String (name) PartitionedFileSetArguments.setDynamicPartitioner(arguments, TestDynamicPartitioner.class); Assert.assertEquals(TestDynamicPartitioner.class.getName(), PartitionedFileSetArguments.getDynamicPartitioner(arguments)); arguments.clear(); PartitionedFileSetArguments.setDynamicPartitioner(arguments, TestDynamicPartitioner.class.getName()); Assert.assertEquals(TestDynamicPartitioner.class.getName(), PartitionedFileSetArguments.getDynamicPartitioner(arguments)); }
/** * Sets a DynamicPartitioner class to be used during the output of a PartitionedFileSet. * * @param arguments the runtime arguments for a partitioned dataset * @param dynamicPartitionerClassName the name of the class to set */ public static void setDynamicPartitioner(Map<String, String> arguments, String dynamicPartitionerClassName) { setDynamicPartitioner(arguments, dynamicPartitionerClassName, DynamicPartitioner.PartitionWriteOption.CREATE); }
@Test public void testGetInputPartitionKeys() throws Exception { Map<String, String> arguments = new HashMap<>(); Assert.assertEquals(0, PartitionedFileSetArguments.getInputPartitionKeys(arguments).size()); List<? extends Partition> partitions = Lists.newArrayList(new BasicPartition(null, "path/doesn't/matter/1", generateUniqueKey()), new BasicPartition(null, "path/doesn't/matter/2", generateUniqueKey()), new BasicPartition(null, "path/doesn't/matter/3", generateUniqueKey())); for (Partition partition : partitions) { PartitionedFileSetArguments.addInputPartition(arguments, partition); } List<PartitionKey> inputPartitionKeys = Lists.transform(partitions, new Function<Partition, PartitionKey>() { @Nullable @Override public PartitionKey apply(Partition input) { return input.getPartitionKey(); } }); Assert.assertEquals(inputPartitionKeys, PartitionedFileSetArguments.getInputPartitionKeys(arguments)); arguments.clear(); PartitionedFileSetArguments.addInputPartitions(arguments, partitions.iterator()); Assert.assertEquals(inputPartitionKeys, PartitionedFileSetArguments.getInputPartitionKeys(arguments)); }
@Override public void onSuccess() throws DataSetException { String outputPath = FileSetArguments.getOutputPath(runtimeArguments); // If there is no output path, it is either using DynamicPartitioner or the job would have failed. // Either way, we can't do much here. if (outputPath == null) { return; } // its possible that there is no output key, if using the DynamicPartitioner, in which case // DynamicPartitioningOutputFormat is responsible for registering the partitions and the metadata PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning()); if (outputKey != null) { Map<String, String> metadata = PartitionedFileSetArguments.getOutputPartitionMetadata(runtimeArguments); addPartition(outputKey, outputPath, metadata, true, false); } // currently, FileSetDataset#onSuccess is a no-op, but call it, in case it does something in the future ((FileSetDataset) files).onSuccess(); }
@Test public void testSetGetInputPartitionFilter() throws Exception { Map<String, String> arguments = new HashMap<>(); PartitionFilter filter = PartitionFilter.builder() .addRangeCondition("i", 30, 40) .addValueCondition("l", 17L) .addValueCondition("s", "x") .build(); PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter); Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments)); arguments = new HashMap<>(); filter = PartitionFilter.builder() .addRangeCondition("i", 30, 40) .addValueCondition("s", "x") .build(); PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter); Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments)); arguments = new HashMap<>(); filter = PartitionFilter.ALWAYS_MATCH; PartitionedFileSetArguments.setInputPartitionFilter(arguments, filter); Assert.assertEquals(filter, PartitionedFileSetArguments.getInputPartitionFilter(arguments)); }
@Override public String getOutputFormatClassName() { checkNotExternal(); PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning()); if (outputKey == null) { return "co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat"; } return files.getOutputFormatClassName(); }
/** * If a partition filter was specified, return the partition keys of all partitions * matching the filter. Otherwise return null. */ @Nullable protected Collection<PartitionKey> computeInputKeys() { PartitionFilter filter; try { filter = PartitionedFileSetArguments.getInputPartitionFilter(runtimeArguments); } catch (Exception e) { throw new DataSetException("Partition filter must be correctly specified in arguments."); } if (filter == null) { return null; } return getPartitionPaths(filter); // never returns null }
private static void ensureDynamicPartitionerConfigured(Map<String, String> args) { if (getDynamicPartitioner(args) == null) { throw new IllegalArgumentException("Cannot get or set a setting of DynamicPartitioner, without first setting " + "a DynamicPartitioner."); } } }
public Map<String, String> getInputArguments(Map<String, String> otherProperties) throws IOException, InterruptedException { Location lock = lock(); try { PartitionDetail partition = getLatestPartition(); if (partition == null) { throw new IllegalArgumentException("Snapshot fileset does not have a latest snapshot, so cannot be read."); } Map<String, String> args = new HashMap<>(); args.putAll(otherProperties); PartitionedFileSetArguments.addInputPartition(args, partition); return args; } finally { lock.delete(); } }
protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) { Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments); if (time != null) { // set the output path according to partition time if (FileSetArguments.getOutputPath(arguments) == null) { String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments); String path; if (Strings.isNullOrEmpty(outputPathFormat)) { path = String.format("%tF/%tH-%tM.%d", time, time, time, time); } else { SimpleDateFormat format = new SimpleDateFormat(outputPathFormat); String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments); if (!Strings.isNullOrEmpty(timeZoneID)) { format.setTimeZone(TimeZone.getTimeZone(timeZoneID)); } path = format.format(new Date(time)); } arguments = Maps.newHashMap(arguments); FileSetArguments.setOutputPath(arguments, path); } // add the corresponding partition key to the arguments PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time); PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey); } // delegate to super class for anything it needs to do return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING); } }
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX); runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs)); Assert.assertTrue( PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY); runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs)); Assert.assertTrue( PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterXY); runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs)); runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "a"); PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterX); runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs)); runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "b"); PartitionedFileSetArguments.setInputPartitionFilter(inputArgs, filterMT); runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, inputArgs)); runtimeArguments.put(AppWithPartitionedFileSet.ROW_TO_WRITE, "n");