co.cask.cdap.api.dataset.lib.FileSetProperties$Builder.setExploreSchema java code examples

/**
 * Configure a file set to use Parquet file format with a given schema. The schema is lower-cased, parsed
 * as an Avro schema, validated and converted into a Hive schema. The file set is configured to use
 * Parquet input and output format, and also configured for Explore to use Parquet. The schema is added
 * to the file set properties in all the different required ways:
 * <ul>
 *   <li>As a top-level dataset property;</li>
 *   <li>As the schema for the input and output format;</li>
 *   <li>As the schema of the Hive table.</li>
 * </ul>
 * @param configuredSchema the original schema configured for the table
 * @param properties a builder for the file set properties
 */
public static void configureParquetFileSet(String configuredSchema, FileSetProperties.Builder properties) {
 String hiveSchema = parseHiveSchema(configuredSchema, configuredSchema);
 properties
  .setEnableExploreOnCreate(true)
  .setExploreFormat("parquet")
  .setExploreSchema(hiveSchema.substring(1, hiveSchema.length() - 1))
  .add(DatasetProperties.SCHEMA, configuredSchema);
}

/**
 * Configure a file set to use ORC file format with a given schema. The schema is parsed
 * validated and converted into a Hive schema which is compatible with ORC format. The file set is configured to use
 * ORC input and output format, and also configured for Explore to use Hive. The schema is added
 * to the file set properties in all the different required ways:
 * <ul>
 *   <li>As a top-level dataset property;</li>
 *   <li>As the schema for the input and output format;</li>
 *   <li>As the schema to be used by the ORC serde (which is used by Hive).</li>
 * </ul>
 *
 * @param configuredSchema the original schema configured for the table
 * @param properties a builder for the file set properties
 */
public static void configureORCFileSet(String configuredSchema, FileSetProperties.Builder properties)  {
 //TODO test if complex cases run with lowercase schema only
 String lowerCaseSchema = configuredSchema.toLowerCase();
 String hiveSchema = parseHiveSchema(lowerCaseSchema, configuredSchema);
 hiveSchema = hiveSchema.substring(1, hiveSchema.length() - 1);
 properties.setExploreInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat")
  .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat")
  .setSerDe("org.apache.hadoop.hive.ql.io.orc.OrcSerde")
  .setExploreSchema(hiveSchema)
  .setEnableExploreOnCreate(true)
  .add(DatasetProperties.SCHEMA, configuredSchema)
  .build();
}

PartitionedFileSetProperties.builder()
 .setExploreFormat("csv")
 .setExploreSchema("key int, value string")
 .setEnableExploreOnCreate(true)
 .build());

.setOutputProperty(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.SEPERATOR, ",")
.setExploreFormat("csv")
.setExploreSchema("key string, value string")
.build());

.setExploreInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat")
.setExploreOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat")
.setExploreSchema("record STRING")
.build());

.setExploreFormat("text")
.setExploreFormatProperty("delimiter", "\n")
.setExploreSchema("record STRING")
.build());

@Override
public void configure() {
 addService(new PartitionService());
 // Create a partitioned file set, configure it to work with MapReduce and with Explore
 createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder()
  // Properties for partitioning
  .setPartitioning(Partitioning.builder().addStringField("partition").addIntField("sub-partition").build())
  // Properties for file set
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class)
  .setOutputProperty(TextOutputFormat.SEPERATOR, ",")
  // Properties for Explore (to create a partitioned Hive table)
  .setEnableExploreOnCreate(true)
  .setExploreFormat("csv")
  .setExploreSchema("f1 STRING, f2 INT")
  .setDescription("App for testing authorization in partitioned filesets.")
  .build());
}

@Override
public void configure() {
 createDataset(INPUT, KeyValueTable.class.getName(), DatasetProperties.EMPTY);
 // create two pfs, identical except for their (table) names
 for (String name : new String[] { PFS, OTHER }) {
  createDataset(name, PartitionedFileSet.class.getName(), PartitionedFileSetProperties.builder()
   .setPartitioning(Partitioning.builder().addIntField("number").build())
   .setOutputFormat(TextOutputFormat.class)
   .setOutputProperty(TextOutputFormat.SEPERATOR, ",")
   .setEnableExploreOnCreate(true)
   .setExploreTableName(name)
   .setExploreSchema("key STRING, value STRING")
   .setExploreFormat("csv")
   .build());
 }
 addMapReduce(new PartitionWriterMR());
}

@Override
protected void addFileProperties(FileSetProperties.Builder propertiesBuilder) {
 propertiesBuilder
  .setInputFormat(TextInputFormat.class)
  .setOutputFormat(TextOutputFormat.class)
  .setEnableExploreOnCreate(true)
  .setExploreFormat("text")
  .setExploreSchema("text string");
}

Javadoc

Set the schema for the Hive table.

Popular methods of FileSetProperties$Builder

Popular in Java

Creating JSON documents from java classes using gson
getContentResolver (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
findViewById (Activity)
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Top PhpStorm plugins

How to use setExploreSchemamethodin co.cask.cdap.api.dataset.lib.FileSetProperties$Builder

Best Java code snippets using co.cask.cdap.api.dataset.lib.FileSetProperties$Builder.setExploreSchema (Showing top 9 results out of 315)

How to use
setExploreSchema
method
in
co.cask.cdap.api.dataset.lib.FileSetProperties$Builder