co.cask.cdap.api.dataset.lib.FileSetArguments.setInputPath java code examples

/**
 * Add an input path in the runtime arguments for a file dataset.
 */
public static void addInputPath(Map<String, String> arguments, String path) {
 String existing = arguments.get(INPUT_PATHS);
 if (existing == null) {
  setInputPath(arguments, path);
 } else {
  setInputPath(arguments, existing + "," + path);
 }
}

private FileSet createFileset(DatasetId dsid) throws IOException, DatasetManagementException {
 dsFrameworkUtil.createInstance("fileSet", dsid, FileSetProperties.builder()
  .setBasePath("testDir").build());
 Map<String, String> fileArgs = Maps.newHashMap();
 FileSetArguments.setInputPath(fileArgs, "some?File1");
 FileSetArguments.setOutputPath(fileArgs, "some?File1");
 return dsFrameworkUtil.getInstance(dsid, fileArgs);
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Map<String, String> inputArgs = new HashMap<>();
  FileSetArguments.setInputPath(inputArgs, "inputFile");
  // test using a stream with the same name, but aliasing it differently (so mapper gets the alias'd name)
  context.addInput(Input.ofDataset(PURCHASES, inputArgs), FileMapper.class);
  Map<String, String> output1Args = new HashMap<>();
  FileSetArguments.setOutputPath(output1Args, "small_purchases");
  context.addOutput(Output.ofDataset(SEPARATED_PURCHASES, output1Args).alias("small_purchases"));
  Map<String, String> output2Args = new HashMap<>();
  FileSetArguments.setOutputPath(output2Args, "large_purchases");
  context.addOutput(Output.ofDataset(SEPARATED_PURCHASES, output2Args).alias("large_purchases"));
  Job job = context.getHadoopJob();
  job.setMapperClass(FileMapper.class);
  job.setNumReduceTasks(0);
 }
}

 @Override
 public void initialize() throws Exception {
  MapReduceContext context = getContext();
  Map<String, String> inputArgs = new HashMap<>();
  FileSetArguments.setInputPath(inputArgs, "inputFile");
  context.addInput(Input.ofDataset(PURCHASES, inputArgs), FileMapper.class);
  // A second input, aliasing so mapper gets the alias'd name
  context.addInput(Input.ofDataset(PURCHASES2, inputArgs).alias("secondPurchases"), FileMapper2.class);
  // since we set a Mapper class on the job itself, omitting the mapper in the addInput call will default to that
  context.addInput(Input.ofDataset(CUSTOMERS, inputArgs));
  Map<String, String> outputArgs = new HashMap<>();
  FileSetArguments.setOutputPath(outputArgs, "output");
  context.addOutput(Output.ofDataset(OUTPUT_DATASET, outputArgs));
  Job job = context.getHadoopJob();
  job.setMapperClass(FileMapper.class);
  job.setReducerClass(FileReducer.class);
 }
}

private void testSparkWithFileSet(ApplicationManager applicationManager, String sparkProgram) throws Exception {
 DataSetManager<FileSet> filesetManager = getDataset("fs");
 FileSet fileset = filesetManager.get();
 Location location = fileset.getLocation("nn");
 prepareFileInput(location);
 Map<String, String> inputArgs = new HashMap<>();
 FileSetArguments.setInputPath(inputArgs, "nn");
 Map<String, String> outputArgs = new HashMap<>();
 FileSetArguments.setOutputPath(inputArgs, "xx");
 Map<String, String> args = new HashMap<>();
 args.putAll(RuntimeArguments.addScope(Scope.DATASET, "fs", inputArgs));
 args.putAll(RuntimeArguments.addScope(Scope.DATASET, "fs", outputArgs));
 args.put("input", "fs");
 args.put("output", "fs");
 SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram).start(args);
 sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 1, TimeUnit.MINUTES);
 validateFileOutput(fileset.getLocation("xx"), "custom:");
 // Cleanup paths after running test
 fileset.getLocation("nn").delete(true);
 fileset.getLocation("xx").delete(true);
}

FileSetArguments.setInputPath(fileArgs, "some.file");
FileSetArguments.setOutputPath(fileArgs, "out");
FileSet fileSet = dsFrameworkUtil.getInstance(testFileSetInstance5, fileArgs);

private void testSparkWithGetDataset(Class<? extends Application> appClass, String sparkProgram) throws Exception {
 ApplicationManager applicationManager = deploy(appClass);
 DataSetManager<FileSet> filesetManager = getDataset("logs");
 FileSet fileset = filesetManager.get();
 Location location = fileset.getLocation("nn");
 prepareInputFileSetWithLogData(location);
 Map<String, String> inputArgs = new HashMap<>();
 FileSetArguments.setInputPath(inputArgs, "nn");
 Map<String, String> args = new HashMap<>();
 args.putAll(RuntimeArguments.addScope(Scope.DATASET, "logs", inputArgs));
 args.put("input", "logs");
 args.put("output", "logStats");
 SparkManager sparkManager = applicationManager.getSparkManager(sparkProgram);
 sparkManager.startAndWaitForRun(args, ProgramRunStatus.COMPLETED, 2, TimeUnit.MINUTES);
 DataSetManager<KeyValueTable> logStatsManager = getDataset("logStats");
 KeyValueTable logStatsTable = logStatsManager.get();
 validateGetDatasetOutput(logStatsTable);
 // Cleanup after run
 location.delete(true);
 logStatsManager.flush();
 try (CloseableIterator<KeyValue<byte[], byte[]>> scan = logStatsTable.scan(null, null)) {
  while (scan.hasNext()) {
   logStatsTable.delete(scan.next().getKey());
  }
 }
 logStatsManager.flush();
}

FileSetArguments.setInputPath(inputArgs, "input");
Map<String, String> outputArgs = new HashMap<>();
FileSetArguments.setOutputPath(outputArgs, "output");

args.put(ScalaCrossNSProgram.OUTPUT_NAME(), "count");
FileSetArguments.setInputPath(args, "inputFile");

FileSetArguments.setInputPath(inputArgs, "nn");
Map<String, String> outputArgs = new HashMap<>();
FileSetArguments.setOutputPath(inputArgs, "xx");

FileSetArguments.setInputPath(sparkArgs, "input");

Javadoc

Sets the input path in the runtime arguments for a file dataset.

Popular methods of FileSetArguments

setOutputPath
addInputPath
Add an input path in the runtime arguments for a file dataset.
getInputPaths
getOutputPath
isBaseOutputPath
setBaseOutputPath
setInputPaths
Sets multiple input paths in the runtime arguments for a file dataset.

Popular in Java

Finding current android device location
findViewById (Activity)
onRequestPermissionsResult (Fragment)
requestLocationUpdates (LocationManager)
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
JFrame (javax.swing)
Github Copilot alternatives

How to use setInputPathmethodin co.cask.cdap.api.dataset.lib.FileSetArguments

Best Java code snippets using co.cask.cdap.api.dataset.lib.FileSetArguments.setInputPath (Showing top 11 results out of 315)

How to use
setInputPath
method
in
co.cask.cdap.api.dataset.lib.FileSetArguments