/** * Set a property for the table format. * This may only be a called after setting the format using {@link #setExploreFormat}. */ public Builder setExploreFormatProperty(String name, String value) { if (format == null) { throw new IllegalStateException("explore format has not been set"); } add(String.format("%s.%s.%s", PROPERTY_EXPLORE_FORMAT, format, name), value); return this; }
/** * Configures whether the file set should take possession of an existing base location. */ public Builder setPossessExisting(boolean possessExisting) { add(DATA_POSSESS_EXISTING, Boolean.toString(possessExisting)); return this; }
/** * Configures whether the file set should use an existing base location. */ public Builder setUseExisting(boolean useExisting) { add(DATA_USE_EXISTING, Boolean.toString(useExisting)); return this; }
/** * Configure a file set to use Avro file format with a given schema. The schema is parsed * as an Avro schema, validated and converted into a Hive schema. The file set is configured to use * Avro key input and output format, and also configured for Explore to use Avro. The schema is added * to the file set properties in all the different required ways: * <ul> * <li>As a top-level dataset property;</li> * <li>As the schema for the input and output format;</li> * <li>As the schema of the Hive table;</li> * <li>As the schema to be used by the Avro serde (which is used by Hive).</li> * </ul> * @param configuredSchema the original schema configured for the table * @param properties a builder for the file set properties */ public static void configureAvroFileSet(String configuredSchema, FileSetProperties.Builder properties) { properties .setEnableExploreOnCreate(true) .setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe") .setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat") .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat") .setTableProperty("avro.schema.literal", configuredSchema) .add(DatasetProperties.SCHEMA, configuredSchema); }
/** * Configure a file set to use Parquet file format with a given schema. The schema is lower-cased, parsed * as an Avro schema, validated and converted into a Hive schema. The file set is configured to use * Parquet input and output format, and also configured for Explore to use Parquet. The schema is added * to the file set properties in all the different required ways: * <ul> * <li>As a top-level dataset property;</li> * <li>As the schema for the input and output format;</li> * <li>As the schema of the Hive table.</li> * </ul> * @param configuredSchema the original schema configured for the table * @param properties a builder for the file set properties */ public static void configureParquetFileSet(String configuredSchema, FileSetProperties.Builder properties) { String hiveSchema = parseHiveSchema(configuredSchema, configuredSchema); properties .setEnableExploreOnCreate(true) .setExploreFormat("parquet") .setExploreSchema(hiveSchema.substring(1, hiveSchema.length() - 1)) .add(DatasetProperties.SCHEMA, configuredSchema); }
/** * Set the default permissions for files and directories */ @Beta public Builder setFilePermissions(String permissions) { add(PROPERTY_FILES_PERMISSIONS, permissions); return this; }
/** * Set the input format used to create the Hive table. * Note that this can be different than the input format used * for the file set itself. */ public Builder setExploreInputFormat(String className) { add(PROPERTY_EXPLORE_INPUT_FORMAT, className); return this; }
/** * Set the class name of the SerDe used to create the Hive table. */ public Builder setSerDe(String className) { add(PROPERTY_EXPLORE_SERDE, className); return this; }
/** * Set the schema for the Hive table. * @param schema a Hive schema string of the form: field type, ... */ public Builder setExploreSchema(String schema) { add(PROPERTY_EXPLORE_SCHEMA, schema); return this; }
/** * Sets the output format of the file dataset. */ public Builder setOutputFormat(String className) { add(OUTPUT_FORMAT, className); return this; }
/** * Set the name of the group for files and directories */ @Beta public Builder setFileGroup(String group) { add(PROPERTY_FILES_GROUP, group); return this; } }
/** * Set a table property to be added to the Hive table. Multiple properties can be set. */ public Builder setTableProperty(String name, String value) { add(PROPERTY_EXPLORE_TABLE_PROPERTY_PREFIX + name, value); return this; }
/** * Sets a property for the input format of the file dataset. */ public Builder setInputProperty(String name, String value) { add(INPUT_PROPERTIES_PREFIX + name, value); return this; }
/** * Sets the input format of the file dataset. */ public Builder setInputFormat(String className) { add(INPUT_FORMAT, className); return this; }
/** * Configures whether the files (the data) in this fileset are managed externally. */ public Builder setDataExternal(boolean isExternal) { add(DATA_EXTERNAL, Boolean.toString(isExternal)); return this; }
/** * Set the output format used to create the Hive table. * Note that this can be different than the output format used * for the file set itself. */ public Builder setExploreOutputFormat(String className) { add(PROPERTY_EXPLORE_OUTPUT_FORMAT, className); return this; }
/** * Enable explore for this dataset. */ public Builder setEnableExploreOnCreate(boolean enabled) { add(PROPERTY_ENABLE_EXPLORE_ON_CREATE, Boolean.toString(enabled)); return this; }
/** * Set the format for the Hive table. * @param format currently, only "text" and "csv" are supported. */ public Builder setExploreFormat(String format) { add(PROPERTY_EXPLORE_FORMAT, format); this.format = format; return this; }
/** * Sets the base path for the file dataset. */ public Builder setBasePath(String path) { add(BASE_PATH, path); return this; }
/** * Sets a property for the output format of the file dataset. */ public Builder setOutputProperty(String name, String value) { add(OUTPUT_PROPERTIES_PREFIX + name, value); return this; }