/** * Configure a file set to use Parquet file format with a given schema. The schema is lower-cased, parsed * as an Avro schema, validated and converted into a Hive schema. The file set is configured to use * Parquet input and output format, and also configured for Explore to use Parquet. The schema is added * to the file set properties in all the different required ways: * <ul> * <li>As a top-level dataset property;</li> * <li>As the schema for the input and output format;</li> * <li>As the schema of the Hive table.</li> * </ul> * @param configuredSchema the original schema configured for the table * @param properties a builder for the file set properties */ public static void configureParquetFileSet(String configuredSchema, FileSetProperties.Builder properties) { String hiveSchema = parseHiveSchema(configuredSchema, configuredSchema); properties .setEnableExploreOnCreate(true) .setExploreFormat("parquet") .setExploreSchema(hiveSchema.substring(1, hiveSchema.length() - 1)) .add(DatasetProperties.SCHEMA, configuredSchema); }
.setOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class) .setOutputProperty(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.SEPERATOR, ",") .setExploreFormat("csv") .setExploreSchema("key string, value string") .build());
@Override public void configure() { createDataset(INPUT, KeyValueTable.class.getName(), DatasetProperties.EMPTY); // create two pfs, identical except for their (table) names for (String name : new String[] { PFS, OTHER }) { createDataset(name, PartitionedFileSet.class.getName(), PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addIntField("number").build()) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") .setEnableExploreOnCreate(true) .setExploreTableName(name) .setExploreSchema("key STRING, value STRING") .setExploreFormat("csv") .build()); } addMapReduce(new PartitionWriterMR()); }
.setExploreFormat("text") .setExploreFormatProperty("delimiter", "\n") .setExploreSchema("record STRING")
@Test public void testFixProperties() throws DatasetManagementException, UnsupportedTypeException { testFix("fileSet", FileSetProperties.builder().setBasePath("/tmp/nn").setDataExternal(true).build()); testFix(FileSet.class.getName(), FileSetProperties.builder().setEnableExploreOnCreate(true).setExploreFormat("csv").build()); testFix("timePartitionedFileSet", FileSetProperties.builder().setBasePath("relative").build()); testFix(TimePartitionedFileSet.class.getName(), FileSetProperties.builder().setBasePath("relative").add("custom", "value").build()); testFix("objectMappedTable", ObjectMappedTableProperties.builder().setType(TestObject.class) .setRowKeyExploreName("x").setRowKeyExploreType(Schema.Type.STRING) .setConflictDetection(ConflictDetection.NONE).build()); testFix(ObjectMappedTable.class.getName(), ObjectMappedTableProperties.builder().setType(TestObject.class) .setRowKeyExploreName("x").setRowKeyExploreType(Schema.Type.STRING) .setConflictDetection(ConflictDetection.NONE).build()); testFix("lineageDataset", DatasetProperties.EMPTY); testFix(LineageDataset.class.getName(), TableProperties.builder().setTTL(1000).build()); testFix(UsageDataset.class.getSimpleName(), DatasetProperties.EMPTY); testFix("table", TableProperties.builder().setColumnFamily("fam").build()); testFix("indexedTable", DatasetProperties.builder().add(IndexedTable.INDEX_COLUMNS_CONF_KEY, "a,c").build()); }
@Override public void configure() { addService(new PartitionService()); // Create a partitioned file set, configure it to work with MapReduce and with Explore createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("partition").addIntField("sub-partition").build()) // Properties for file set .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true) .setExploreFormat("csv") .setExploreSchema("f1 STRING, f2 INT") .setDescription("App for testing authorization in partitioned filesets.") .build()); }
@Override protected void addFileProperties(FileSetProperties.Builder propertiesBuilder) { propertiesBuilder .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setEnableExploreOnCreate(true) .setExploreFormat("text") .setExploreSchema("text string"); }