DatasetProperties filesetAvroTableProps = FileSetProperties.builder() .setTableProperty(DatasetSystemMetadataProvider.FILESET_AVRO_SCHEMA_PROPERTY, "avro-table-schema") .build(); assertDatasetSchema("avro-table-schema", filesetAvroTableProps); .setTableProperty(DatasetSystemMetadataProvider.FILESET_AVRO_SCHEMA_PROPERTY, "avro-table-schema") .add(DatasetProperties.SCHEMA, "avro-schema") .build();
/** * Configure a file set to use Avro file format with a given schema. The schema is parsed * as an Avro schema, validated and converted into a Hive schema. The file set is configured to use * Avro key input and output format, and also configured for Explore to use Avro. The schema is added * to the file set properties in all the different required ways: * <ul> * <li>As a top-level dataset property;</li> * <li>As the schema for the input and output format;</li> * <li>As the schema of the Hive table;</li> * <li>As the schema to be used by the Avro serde (which is used by Hive).</li> * </ul> * @param configuredSchema the original schema configured for the table * @param properties a builder for the file set properties */ public static void configureAvroFileSet(String configuredSchema, FileSetProperties.Builder properties) { properties .setEnableExploreOnCreate(true) .setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe") .setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat") .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat") .setTableProperty("avro.schema.literal", configuredSchema) .add(DatasetProperties.SCHEMA, configuredSchema); }