co.cask.cdap.api.dataset.lib.FileSetProperties$Builder.setInputFormat java code examples

/**
 * Sets the input format of the file dataset.
 */
public Builder setInputFormat(Class<?> inputFormatClass) {
 setInputFormat(inputFormatClass.getName());
 return this;
}

private DatasetProperties createProperties(InputFormatProvider inputFormatProvider) {
 FileSetProperties.Builder properties = SnapshotFileSet.getBaseProperties(config);
 if (!Strings.isNullOrEmpty(config.getBasePath())) {
  properties.setBasePath(config.getBasePath());
 }
 properties.setInputFormat(inputFormatProvider.getInputFormatClassName());
 for (Map.Entry<String, String> formatProperty : inputFormatProvider.getInputFormatConfiguration().entrySet()) {
  properties.setInputProperty(formatProperty.getKey(), formatProperty.getValue());
 }
 addFileProperties(properties);
 return properties.build();
}

private DatasetProperties createProperties(InputFormatProvider inputFormatProvider) {
 FileSetProperties.Builder properties = FileSetProperties.builder();
 if (!Strings.isNullOrEmpty(config.getBasePath())) {
  properties.setBasePath(config.getBasePath());
 }
 properties.setInputFormat(inputFormatProvider.getInputFormatClassName());
 for (Map.Entry<String, String> formatProperty : inputFormatProvider.getInputFormatConfiguration().entrySet()) {
  properties.setInputProperty(formatProperty.getKey(), formatProperty.getValue());
 }
 addFileSetProperties(properties);
 return properties.build();
}

 .setInputFormat(TextInputFormat.class)
 .setOutputFormat(TextOutputFormat.class)
 .setOutputProperty(TextOutputFormat.SEPERATOR, ":")
dsFramework.addInstance("fileSet", rtOutput1, FileSetProperties.builder()
 .setBasePath("rtOutput1")
 .setInputFormat(TextInputFormat.class)
 .setOutputFormat(TextOutputFormat.class)
 .setOutputProperty(TextOutputFormat.SEPERATOR, ":")
 .setInputFormat(TextInputFormat.class)
 .setOutputFormat(TextOutputFormat.class)
 .setOutputProperty(TextOutputFormat.SEPERATOR, ":")

 .builder().setBasePath("some/path").setInputFormat(TextInputFormat.class).build());
DataSetManager<FileSet> bManager = getDataset("b");
String bFormat = bManager.get().getInputFormatClassName();

DatasetId inputDatasetId = inputNSMeta.getNamespaceId().dataset("input");
addDatasetInstance(FileSet.class.getName(), inputDatasetId,
          FileSetProperties.builder().setInputFormat(TextInputFormat.class).build());

@Override
public void configure() {
 try {
  createDataset("fs", FileSet.class, FileSetProperties.builder()
   .setInputFormat(MyTextInputFormat.class)
   .setOutputFormat(MyTextOutputFormat.class)
   .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build());
  createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder()
   .setPartitioning(Partitioning.builder().addStringField("x").build())
   .setInputFormat(MyTextInputFormat.class)
   .setOutputFormat(TextOutputFormat.class)
   .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build());
  createDataset("tpfs", TimePartitionedFileSet.class, FileSetProperties.builder()
   .setInputFormat(MyTextInputFormat.class)
   .setOutputFormat(TextOutputFormat.class)
   .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build());
  createDataset("myfs", MyFileSet.class, FileSetProperties.builder()
   .setInputFormat(MyTextInputFormat.class)
   .setOutputFormat(TextOutputFormat.class)
   .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build());
  addSpark(new FileCountSparkProgram());
  addSpark(new ScalaFileCountSparkProgram());
 } catch (Throwable t) {
  throw Throwables.propagate(t);
 }
}

@Test
public void testInputOutputFormatClassAtRuntime() throws Exception {
 // create a dataset with text input and output formats
 DatasetId datasetId = OTHER_NAMESPACE.dataset("testRuntimeFormats");
 dsFrameworkUtil.createInstance("fileSet", datasetId, FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class)
  .build());
 // without passing anything in arguments, the input/output format classes will come from dataset properties
 FileSet fs = dsFrameworkUtil.getInstance(datasetId);
 Assert.assertEquals(TextInputFormat.class.getName(), fs.getInputFormatClassName());
 Assert.assertEquals(TextOutputFormat.class.getName(), fs.getOutputFormatClassName());
 // allow overriding the input format in dataset runtime args
 fs = dsFrameworkUtil.getInstance(datasetId, ImmutableMap.of(
  FileSetProperties.INPUT_FORMAT, CombineTextInputFormat.class.getName()));
 Assert.assertEquals(CombineTextInputFormat.class.getName(), fs.getInputFormatClassName());
 Assert.assertEquals(TextOutputFormat.class.getName(), fs.getOutputFormatClassName());
 // allow overriding both the input and output format in dataset runtime args
 fs = dsFrameworkUtil.getInstance(datasetId, ImmutableMap.of(
  FileSetProperties.INPUT_FORMAT, CombineTextInputFormat.class.getName(),
  FileSetProperties.OUTPUT_FORMAT, NullOutputFormat.class.getName()));
 Assert.assertEquals(CombineTextInputFormat.class.getName(), fs.getInputFormatClassName());
 Assert.assertEquals(NullOutputFormat.class.getName(), fs.getOutputFormatClassName());
}

@Override
public void configure() {
 setName("AppWithMapReduceUsingFile");
 setDescription("Application with MapReduce job using file as dataset");
 createDataset(INPUT, "table");
 createDataset(OUTPUT, "table");
 Class<? extends InputFormat> inputFormatClass =
  getConfig().isUseCombineFileInputFormat() ? CombineTextInputFormat.class : TextInputFormat.class;
 createDataset(PARTITIONED, "partitionedFileSet", PartitionedFileSetProperties.builder()
  .setPartitioning(Partitioning.builder()
            .addStringField("type")
            .addLongField("time")
            .build())
   // properties for file set
  .setBasePath("partitioned")
  .setInputFormat(inputFormatClass)
  .setOutputFormat(TextOutputFormat.class)
  .setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR)
   // don't configure properties for the Hive table - this is used in a context where explore is disabled
  .build());
 addMapReduce(new PartitionWriter());
 addMapReduce(new PartitionReader());
}

@Override
public void configure() {
 setName("AppWithMapReduceUsingFile");
 setDescription("Application with MapReduce job using file as dataset");
 String inputDataset = getConfig().inputDataset;
 String outputDataset = getConfig().outputDataset;
 createDataset(inputDataset, "fileSet", FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class)
  .setOutputProperty(TextOutputFormat.SEPERATOR, ":")
  .build());
 if (!outputDataset.equals(inputDataset)) {
  createDataset(outputDataset, "fileSet",  FileSetProperties.builder()
   .setBasePath("foo/my-file-output")
   .setInputFormat(TextInputFormat.class)
   .setOutputFormat(TextOutputFormat.class)
   .setOutputProperty(TextOutputFormat.SEPERATOR, ":")
   .build());
 }
 addMapReduce(new ComputeSum(getConfig()));
}

@Override
public void configure() {
 setName("AppWithMapReduceUsingMultipleInputs");
 setDescription("Application with MapReduce job using multiple inputs");
 createDataset(PURCHASES, "fileSet", FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .build());
 createDataset(PURCHASES2, "fileSet", FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .build());
 createDataset(CUSTOMERS, "fileSet", FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .build());
 createDataset(OUTPUT_DATASET, "fileSet", FileSetProperties.builder()
  .setOutputFormat(TextOutputFormat.class)
  .setOutputProperty(TextOutputFormat.SEPERATOR, " ")
  .build());
 addMapReduce(new ComputeSum());
 addMapReduce(new InvalidMapReduce());
}

@Override
public void configure() {
 setName("AppWithMapReduceUsingFile");
 setDescription("Application with MapReduce job using file as dataset");
 createDataset(INPUT, "table");
 createDataset(OUTPUT, "table");
 createDataset(TIME_PARTITIONED, "timePartitionedFileSet", FileSetProperties.builder()
  // properties for file set
  .setBasePath("partitioned")
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class)
  .setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR)
  // don't configure properties for the Hive table - this is used in a context where explore is disabled
  .build());
 addMapReduce(new PartitionWriter());
 addMapReduce(new PartitionReader());
}

 @Override
 protected void configure() {
  setName(WORKFLOW_NAME);
  setDescription("Workflow program with local datasets.");
  createLocalDataset(WORDCOUNT_DATASET, KeyValueTable.class);
  createLocalDataset(CSV_FILESET_DATASET, FileSet.class, FileSetProperties.builder()
   .setInputFormat(TextInputFormat.class)
   .setOutputFormat(TextOutputFormat.class)
   .build());
  createLocalDataset(UNIQUE_ID_DATASET, KeyValueTable.class);
  addAction(new LocalDatasetWriter());
  addSpark("JavaSparkCSVToSpaceConverter");
  addMapReduce("WordCount");
  addAction(new LocalDatasetReader("readerAction"));
 }
}

@Override
public void configure() {
 setName("AppWithMapReduceUsingMultipleOutputs");
 setDescription("Application with MapReduce job using multiple outputs");
 createDataset(PURCHASES, "fileSet", FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .build());
 createDataset(SEPARATED_PURCHASES, "fileSet", FileSetProperties.builder()
  .setOutputFormat(TextOutputFormat.class)
  .setOutputProperty(TextOutputFormat.SEPERATOR, " ")
  .build());
 addMapReduce(new SeparatePurchases());
 addMapReduce(new InvalidMapReduce());
}

@Override
protected void addFileProperties(FileSetProperties.Builder propertiesBuilder) {
 propertiesBuilder
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class)
  .setEnableExploreOnCreate(true)
  .setExploreFormat("text")
  .setExploreSchema("text string");
}

@Override
protected void configure() {
 setName(SPARK);
 setMainClass(getClass());
 createDataset(SPARK_INPUT, FileSet.class, FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class)
  .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build());
 createDataset(SPARK_TABLE, Table.class);
 usePlugin("t1", "n1", "plugin", PluginProperties.builder().add(KEY, TEST).build());
}

@Override
public void configure() {
 createDataset("logs", FileSet.class, FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class).build());
 createDataset("logStats", KeyValueTable.class.getName());
 addSpark(new SparkLogParser());
 addSpark(new ScalaSparkLogParser());
}

@Override
protected void configure() {
 createDataset(INPUT_FILE_SET, FileSet.class, FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class).build());
 createDataset(OUTPUT_FILE_SET, FileSet.class, FileSetProperties.builder()
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class).build());
}

@Override
public void initialize(SparkHttpServiceContext context) throws Exception {
 super.initialize(context);
 try {
  context.getAdmin().createDataset("wordcount", FileSet.class.getName(), FileSetProperties.builder()
   .setInputFormat(TextInputFormat.class)
   .build());
 } catch (InstanceConflictException e) {
  // It's ok if the dataset already exists
 }
}

Javadoc

Sets the input format of the file dataset.

Popular methods of FileSetProperties$Builder

build
setOutputFormat
setBasePath
setOutputProperty
add
setEnableExploreOnCreate
setExploreFormat
setExploreInputFormat
setExploreOutputFormat
setSerDe
addAll
setDataExternal
Configures whether the files (the data) in this fileset are managed externally.

Popular in Java

Creating JSON documents from java classes using gson
getContentResolver (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
findViewById (Activity)
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Best plugins for Eclipse

How to use setInputFormatmethodin co.cask.cdap.api.dataset.lib.FileSetProperties$Builder

Best Java code snippets using co.cask.cdap.api.dataset.lib.FileSetProperties$Builder.setInputFormat (Showing top 19 results out of 315)

How to use
setInputFormat
method
in
co.cask.cdap.api.dataset.lib.FileSetProperties$Builder