public Map<String, String> getOutputArguments(long snapshotTime, Map<String, String> otherProperties) { Map<String, String> args = new HashMap<>(); args.putAll(otherProperties); PartitionKey outputKey = PartitionKey.builder().addLongField(SNAPSHOT_FIELD, snapshotTime).build(); PartitionedFileSetArguments.setOutputPartitionKey(args, outputKey); return args; }
protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) { Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments); if (time != null) { // set the output path according to partition time if (FileSetArguments.getOutputPath(arguments) == null) { String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments); String path; if (Strings.isNullOrEmpty(outputPathFormat)) { path = String.format("%tF/%tH-%tM.%d", time, time, time, time); } else { SimpleDateFormat format = new SimpleDateFormat(outputPathFormat); String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments); if (!Strings.isNullOrEmpty(timeZoneID)) { format.setTimeZone(TimeZone.getTimeZone(timeZoneID)); } path = format.format(new Date(time)); } arguments = Maps.newHashMap(arguments); FileSetArguments.setOutputPath(arguments, path); } // add the corresponding partition key to the arguments PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time); PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey); } // delegate to super class for anything it needs to do return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING); } }
protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) { Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments); if (time != null) { // set the output path according to partition time if (FileSetArguments.getOutputPath(arguments) == null) { String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments); String path; if (Strings.isNullOrEmpty(outputPathFormat)) { path = String.format("%tF/%tH-%tM.%d", time, time, time, time); } else { SimpleDateFormat format = new SimpleDateFormat(outputPathFormat); String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments); if (!Strings.isNullOrEmpty(timeZoneID)) { format.setTimeZone(TimeZone.getTimeZone(timeZoneID)); } path = format.format(new Date(time)); } arguments = Maps.newHashMap(arguments); FileSetArguments.setOutputPath(arguments, path); } // add the corresponding partition key to the arguments PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time); PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey); } // delegate to super class for anything it needs to do return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING); } }
private QueryHandle doPartitionOperation(DatasetId datasetId, DatasetSpecification spec, PartitionKey key, String endpoint, String operationName, Map<String, String> additionalArguments) throws ExploreException { Map<String, String> args = new HashMap<>(additionalArguments); PartitionedFileSetArguments.setOutputPartitionKey(args, key); String tableName = ExploreProperties.getExploreTableName(spec.getProperties()); String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties()); if (tableName != null) { args.put(ExploreProperties.PROPERTY_EXPLORE_TABLE_NAME, tableName); } if (databaseName != null) { args.put(ExploreProperties.PROPERTY_EXPLORE_DATABASE_NAME, databaseName); } HttpResponse response = doPost(String.format("namespaces/%s/data/explore/datasets/%s/%s", datasetId.getNamespace(), datasetId.getEntityName(), endpoint), GSON.toJson(args), null); if (response.getResponseCode() == HttpURLConnection.HTTP_OK) { return QueryHandle.fromId(parseResponseAsMap(response, "handle")); } throw new ExploreException(String.format("Cannot %s partition with key %s in dataset %s. Reason: %s", operationName, key, datasetId.toString(), response)); }
private QueryHandle doPartitionOperation(DatasetId datasetId, DatasetSpecification spec, PartitionKey key, String endpoint, String operationName, Map<String, String> additionalArguments) throws ExploreException { Map<String, String> args = new HashMap<>(additionalArguments); PartitionedFileSetArguments.setOutputPartitionKey(args, key); String tableName = ExploreProperties.getExploreTableName(spec.getProperties()); String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties()); if (tableName != null) { args.put(ExploreProperties.PROPERTY_EXPLORE_TABLE_NAME, tableName); } if (databaseName != null) { args.put(ExploreProperties.PROPERTY_EXPLORE_DATABASE_NAME, databaseName); } HttpResponse response = doPost(String.format("namespaces/%s/data/explore/datasets/%s/%s", datasetId.getNamespace(), datasetId.getEntityName(), endpoint), GSON.toJson(args), null); if (response.getResponseCode() == HttpURLConnection.HTTP_OK) { return QueryHandle.fromId(parseResponseAsMap(response, "handle")); } throw new ExploreException(String.format("Cannot %s partition with key %s in dataset %s. Reason: %s", operationName, key, datasetId.toString(), response)); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); batchPartitionCommitter = PartitionBatchInput.setInput(context, "lines", new KVTableStatePersistor("consumingState", "state.key")); Map<String, String> outputArgs = new HashMap<>(); PartitionKey partitionKey = PartitionKey.builder().addLongField("time", context.getLogicalStartTime()).build(); PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, partitionKey); context.addOutput(Output.ofDataset("outputLines", outputArgs)); context.addOutput(Output.ofDataset("counts")); Job job = context.getHadoopJob(); job.setMapperClass(Tokenizer.class); job.setReducerClass(Counter.class); job.setNumReduceTasks(1); }
@Test public void testSetGetOutputPartitionKey() throws Exception { Map<String, String> arguments = new HashMap<>(); PartitionKey key = PartitionKey.builder() .addIntField("i", 42) .addLongField("l", 17L) .addStringField("s", "x") .build(); PartitionedFileSetArguments.setOutputPartitionKey(arguments, key); Assert.assertEquals(key, PartitionedFileSetArguments.getOutputPartitionKey(arguments, PARTITIONING)); }
@Override public void initialize() throws Exception { MapReduceContext context = getContext(); Job job = context.getHadoopJob(); job.setMapperClass(TokenMapper.class); job.setNumReduceTasks(0); String inputText = getContext().getRuntimeArguments().get("input.text"); Preconditions.checkNotNull(inputText); KeyValueTable kvTable = getContext().getDataset(INPUT); kvTable.write("key", inputText); context.addInput(Input.ofDataset(INPUT, kvTable.getSplits(1, null, null))); String outputDatasets = getContext().getRuntimeArguments().get("output.datasets"); outputDatasets = outputDatasets != null ? outputDatasets : PFS; for (String outputName : outputDatasets.split(",")) { String outputPartition = getContext().getRuntimeArguments().get(outputName + ".output.partition"); PartitionKey outputPartitionKey = outputPartition == null ? null : PartitionKey.builder().addField("number", Integer.parseInt(outputPartition)).build(); Map<String, String> outputArguments = new HashMap<>(); if (outputPartitionKey != null) { PartitionedFileSetArguments.setOutputPartitionKey(outputArguments, outputPartitionKey); } else { PartitionedFileSetArguments.setDynamicPartitioner(outputArguments, KeyPartitioner.class); } context.addOutput(Output.ofDataset(outputName, outputArguments)); } }
@Override protected void initialize() throws Exception { // add a partition to the pfs PartitionedFileSet pfs = getContext().getDataset("pfs"); PartitionKey key = PartitionKey.builder().addField("x", 1).build(); PartitionOutput partitionOutput = pfs.getPartitionOutput(key); partitionOutput.addPartition(); // configure the same partition as output for the MR Map<String, String> args = new HashMap<>(); PartitionedFileSetArguments.setOutputPartitionKey(args, key); getContext().addOutput(Output.ofDataset("pfs", args)); // configure an input KeyValueTable kv = getContext().getDataset("recorder"); kv.write("hello", "world"); getContext().addInput(Input.ofDataset("recorder")); // configure mapper and no reducers Job job = getContext().getHadoopJob(); job.setMapperClass(IdentityMapper.class); job.setNumReduceTasks(0); }
Map<String, String> outputArgs = new HashMap<>(); PartitionKey outputKey = PartitionKey.builder().addStringField("x", "xx").build(); PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey); Map<String, String> args = new HashMap<>(); args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", inputArgs));
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX); runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs)); Assert.assertTrue( PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY); runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs)); Assert.assertTrue(
@Test public void testRollbackOnJobFailure() throws Exception { // tests the logic of #onFailure method Map<String, String> args = new HashMap<>(); FileSetArguments.setOutputPath(args, "custom/output/path"); PartitionedFileSetArguments.setOutputPartitionKey(args, PARTITION_KEY); PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance, args); TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs); txContext.start(); Location outputLocation = pfs.getEmbeddedFileSet().getOutputLocation(); Assert.assertFalse(outputLocation.exists()); outputLocation.mkdirs(); Assert.assertTrue(outputLocation.exists()); ((PartitionedFileSetDataset) pfs).onFailure(); txContext.abort(); // because the previous transaction aborted, the partition as well as the directory for it will not exist txContext.start(); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertFalse(outputLocation.exists()); txContext.finish(); }