co.cask.cdap.api.dataset.lib.PartitionedFileSetArguments.setOutputPartitionKey java code examples

public Map<String, String> getOutputArguments(long snapshotTime, Map<String, String> otherProperties) {
 Map<String, String> args = new HashMap<>();
 args.putAll(otherProperties);
 PartitionKey outputKey = PartitionKey.builder().addLongField(SNAPSHOT_FIELD, snapshotTime).build();
 PartitionedFileSetArguments.setOutputPartitionKey(args, outputKey);
 return args;
}

 protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) {
  Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments);
  if (time != null) {
   // set the output path according to partition time
   if (FileSetArguments.getOutputPath(arguments) == null) {
    String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments);
    String path;
    if (Strings.isNullOrEmpty(outputPathFormat)) {
     path = String.format("%tF/%tH-%tM.%d", time, time, time, time);
    } else {
     SimpleDateFormat format = new SimpleDateFormat(outputPathFormat);
     String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments);
     if (!Strings.isNullOrEmpty(timeZoneID)) {
      format.setTimeZone(TimeZone.getTimeZone(timeZoneID));
     }
     path = format.format(new Date(time));
    }
    arguments = Maps.newHashMap(arguments);
    FileSetArguments.setOutputPath(arguments, path);
   }
   // add the corresponding partition key to the arguments
   PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time);
   PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey);
  }
  // delegate to super class for anything it needs to do
  return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING);
 }
}

 protected Map<String, String> updateArgumentsIfNeeded(Map<String, String> arguments) {
  Long time = TimePartitionedFileSetArguments.getOutputPartitionTime(arguments);
  if (time != null) {
   // set the output path according to partition time
   if (FileSetArguments.getOutputPath(arguments) == null) {
    String outputPathFormat = TimePartitionedFileSetArguments.getOutputPathFormat(arguments);
    String path;
    if (Strings.isNullOrEmpty(outputPathFormat)) {
     path = String.format("%tF/%tH-%tM.%d", time, time, time, time);
    } else {
     SimpleDateFormat format = new SimpleDateFormat(outputPathFormat);
     String timeZoneID = TimePartitionedFileSetArguments.getOutputPathTimeZone(arguments);
     if (!Strings.isNullOrEmpty(timeZoneID)) {
      format.setTimeZone(TimeZone.getTimeZone(timeZoneID));
     }
     path = format.format(new Date(time));
    }
    arguments = Maps.newHashMap(arguments);
    FileSetArguments.setOutputPath(arguments, path);
   }
   // add the corresponding partition key to the arguments
   PartitionKey outputKey = TimePartitionedFileSetDataset.partitionKeyForTime(time);
   PartitionedFileSetArguments.setOutputPartitionKey(arguments, outputKey);
  }
  // delegate to super class for anything it needs to do
  return updateArgumentsIfNeeded(arguments, TimePartitionedFileSetDataset.PARTITIONING);
 }
}

private QueryHandle doPartitionOperation(DatasetId datasetId, DatasetSpecification spec, PartitionKey key,
                     String endpoint, String operationName,
                     Map<String, String> additionalArguments) throws ExploreException {
 Map<String, String> args = new HashMap<>(additionalArguments);
 PartitionedFileSetArguments.setOutputPartitionKey(args, key);
 String tableName = ExploreProperties.getExploreTableName(spec.getProperties());
 String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
 if (tableName != null) {
  args.put(ExploreProperties.PROPERTY_EXPLORE_TABLE_NAME, tableName);
 }
 if (databaseName != null) {
  args.put(ExploreProperties.PROPERTY_EXPLORE_DATABASE_NAME, databaseName);
 }
 HttpResponse response = doPost(String.format("namespaces/%s/data/explore/datasets/%s/%s",
                        datasetId.getNamespace(), datasetId.getEntityName(), endpoint),
                 GSON.toJson(args), null);
 if (response.getResponseCode() == HttpURLConnection.HTTP_OK) {
  return QueryHandle.fromId(parseResponseAsMap(response, "handle"));
 }
 throw new ExploreException(String.format("Cannot %s partition with key %s in dataset %s. Reason: %s",
                      operationName, key, datasetId.toString(), response));
}

private QueryHandle doPartitionOperation(DatasetId datasetId, DatasetSpecification spec, PartitionKey key,
                     String endpoint, String operationName,
                     Map<String, String> additionalArguments) throws ExploreException {
 Map<String, String> args = new HashMap<>(additionalArguments);
 PartitionedFileSetArguments.setOutputPartitionKey(args, key);
 String tableName = ExploreProperties.getExploreTableName(spec.getProperties());
 String databaseName = ExploreProperties.getExploreDatabaseName(spec.getProperties());
 if (tableName != null) {
  args.put(ExploreProperties.PROPERTY_EXPLORE_TABLE_NAME, tableName);
 }
 if (databaseName != null) {
  args.put(ExploreProperties.PROPERTY_EXPLORE_DATABASE_NAME, databaseName);
 }
 HttpResponse response = doPost(String.format("namespaces/%s/data/explore/datasets/%s/%s",
                        datasetId.getNamespace(), datasetId.getEntityName(), endpoint),
                 GSON.toJson(args), null);
 if (response.getResponseCode() == HttpURLConnection.HTTP_OK) {
  return QueryHandle.fromId(parseResponseAsMap(response, "handle"));
 }
 throw new ExploreException(String.format("Cannot %s partition with key %s in dataset %s. Reason: %s",
                      operationName, key, datasetId.toString(), response));
}

@Override
public void initialize() throws Exception {
 MapReduceContext context = getContext();
 batchPartitionCommitter =
  PartitionBatchInput.setInput(context, "lines", new KVTableStatePersistor("consumingState", "state.key"));
 Map<String, String> outputArgs = new HashMap<>();
 PartitionKey partitionKey = PartitionKey.builder().addLongField("time", context.getLogicalStartTime()).build();
 PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, partitionKey);
 context.addOutput(Output.ofDataset("outputLines", outputArgs));
 context.addOutput(Output.ofDataset("counts"));
 Job job = context.getHadoopJob();
 job.setMapperClass(Tokenizer.class);
 job.setReducerClass(Counter.class);
 job.setNumReduceTasks(1);
}

@Test
public void testSetGetOutputPartitionKey() throws Exception {
 Map<String, String> arguments = new HashMap<>();
 PartitionKey key = PartitionKey.builder()
  .addIntField("i", 42)
  .addLongField("l", 17L)
  .addStringField("s", "x")
  .build();
 PartitionedFileSetArguments.setOutputPartitionKey(arguments, key);
 Assert.assertEquals(key, PartitionedFileSetArguments.getOutputPartitionKey(arguments, PARTITIONING));
}

@Override
public void initialize() throws Exception {
 MapReduceContext context = getContext();
 Job job = context.getHadoopJob();
 job.setMapperClass(TokenMapper.class);
 job.setNumReduceTasks(0);
 String inputText = getContext().getRuntimeArguments().get("input.text");
 Preconditions.checkNotNull(inputText);
 KeyValueTable kvTable = getContext().getDataset(INPUT);
 kvTable.write("key", inputText);
 context.addInput(Input.ofDataset(INPUT, kvTable.getSplits(1, null, null)));
 String outputDatasets = getContext().getRuntimeArguments().get("output.datasets");
 outputDatasets = outputDatasets != null ? outputDatasets : PFS;
 for (String outputName : outputDatasets.split(",")) {
  String outputPartition = getContext().getRuntimeArguments().get(outputName + ".output.partition");
  PartitionKey outputPartitionKey = outputPartition == null ? null :
   PartitionKey.builder().addField("number", Integer.parseInt(outputPartition)).build();
  Map<String, String> outputArguments = new HashMap<>();
  if (outputPartitionKey != null) {
   PartitionedFileSetArguments.setOutputPartitionKey(outputArguments, outputPartitionKey);
  } else {
   PartitionedFileSetArguments.setDynamicPartitioner(outputArguments, KeyPartitioner.class);
  }
  context.addOutput(Output.ofDataset(outputName, outputArguments));
 }
}

@Override
protected void initialize() throws Exception {
 // add a partition to the pfs
 PartitionedFileSet pfs = getContext().getDataset("pfs");
 PartitionKey key = PartitionKey.builder().addField("x", 1).build();
 PartitionOutput partitionOutput = pfs.getPartitionOutput(key);
 partitionOutput.addPartition();
 // configure the same partition as output for the MR
 Map<String, String> args = new HashMap<>();
 PartitionedFileSetArguments.setOutputPartitionKey(args, key);
 getContext().addOutput(Output.ofDataset("pfs", args));
 // configure an input
 KeyValueTable kv = getContext().getDataset("recorder");
 kv.write("hello", "world");
 getContext().addInput(Input.ofDataset("recorder"));
 // configure mapper and no reducers
 Job job = getContext().getHadoopJob();
 job.setMapperClass(IdentityMapper.class);
 job.setNumReduceTasks(0);
}

Map<String, String> outputArgs = new HashMap<>();
PartitionKey outputKey = PartitionKey.builder().addStringField("x", "xx").build();
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey);
Map<String, String> args = new HashMap<>();
args.putAll(RuntimeArguments.addScope(Scope.DATASET, "pfs", inputArgs));

PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyX);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
Assert.assertTrue(
PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, keyY);
runtimeArguments.putAll(RuntimeArguments.addScope(Scope.DATASET, PARTITIONED, outputArgs));
Assert.assertTrue(

@Test
public void testRollbackOnJobFailure() throws Exception {
 // tests the logic of #onFailure method
 Map<String, String> args = new HashMap<>();
 FileSetArguments.setOutputPath(args, "custom/output/path");
 PartitionedFileSetArguments.setOutputPartitionKey(args, PARTITION_KEY);
 PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance, args);
 TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
 txContext.start();
 Location outputLocation = pfs.getEmbeddedFileSet().getOutputLocation();
 Assert.assertFalse(outputLocation.exists());
 outputLocation.mkdirs();
 Assert.assertTrue(outputLocation.exists());
 ((PartitionedFileSetDataset) pfs).onFailure();
 txContext.abort();
 // because the previous transaction aborted, the partition as well as the directory for it will not exist
 txContext.start();
 Assert.assertNull(pfs.getPartition(PARTITION_KEY));
 Assert.assertFalse(outputLocation.exists());
 txContext.finish();
}

How to use setOutputPartitionKeymethodin co.cask.cdap.api.dataset.lib.PartitionedFileSetArguments

Best Java code snippets using co.cask.cdap.api.dataset.lib.PartitionedFileSetArguments.setOutputPartitionKey (Showing top 12 results out of 315)

How to use
setOutputPartitionKey
method
in
co.cask.cdap.api.dataset.lib.PartitionedFileSetArguments