/** * Add a {@link Schema} obtained from an Avro data file to the given {@link HiveRegistrationUnit}. * * <p> * If the length of the schema is less than {@link #SCHEMA_LITERAL_LENGTH_LIMIT}, it will be added via * {@link #SCHEMA_LITERAL}. Otherwise, the schema will be written to {@link #SCHEMA_FILE_NAME} and added * via {@link #SCHEMA_URL}. * </p> */ protected void addSchemaFromAvroFile(Schema schema, Path schemaFile, HiveRegistrationUnit hiveUnit) throws IOException { Preconditions.checkNotNull(schema); String schemaStr = schema.toString(); if (schemaStr.length() <= this.schemaLiteralLengthLimit) { hiveUnit.setSerDeProp(SCHEMA_LITERAL, schema.toString()); } else { Path schemaTempFile = null; if (useSchemaTempFile) { schemaTempFile = new Path(schemaFile.getParent(), this.schemaTempFileName); } AvroUtils.writeSchemaToFile(schema, schemaFile, schemaTempFile, this.fs, true); log.info("Using schema file " + schemaFile.toString()); hiveUnit.setSerDeProp(SCHEMA_URL, schemaFile.toString()); } }
private void addSchemaProperties(Path path, HiveRegistrationUnit hiveUnit, Schema schema) throws IOException { Path schemaFile = new Path(path, this.schemaFileName); if (this.useSchemaFile) { hiveUnit.setSerDeProp(SCHEMA_URL, schemaFile.toString()); } else { try (Timer.Context context = metricContext.timer(HIVE_SPEC_SCHEMA_WRITING_TIMER).time()) { addSchemaFromAvroFile(schema, schemaFile, hiveUnit); } } }
/** * Set serde parameters for a table/partition. * * <p> * When using {@link org.apache.gobblin.hive.metastore.HiveMetaStoreBasedRegister}, since it internally use * {@link org.apache.hadoop.hive.metastore.api.Table} and {@link org.apache.hadoop.hive.metastore.api.Partition} * which distinguishes between table/partition parameters, storage descriptor parameters, and serde parameters, * one may need to distinguish them when constructing a {@link HiveRegistrationUnit} by using * {@link #setProps(State)}, {@link #setStorageProps(State)} and * {@link #setSerDeProps(State)}. When using query-based Hive registration, they do not need to be * distinguished since all parameters will be passed via TBLPROPERTIES. * </p> */ public void setSerDeProps(State serdeProps) { for (String propKey : serdeProps.getPropertyNames()) { setSerDeProp(propKey, serdeProps.getProp(propKey)); } }
@Override public void updateSchema(HiveRegistrationUnit existingUnit, HiveRegistrationUnit newUnit) throws IOException { Preconditions.checkArgument( newUnit.getSerDeProps().contains(SCHEMA_LITERAL) || newUnit.getSerDeProps().contains(SCHEMA_URL)); if (newUnit.getSerDeProps().contains(SCHEMA_LITERAL)) { existingUnit.setSerDeProp(SCHEMA_LITERAL, newUnit.getSerDeProps().getProp(SCHEMA_LITERAL)); } else { existingUnit.setSerDeProp(SCHEMA_URL, newUnit.getSerDeProps().getProp(SCHEMA_URL)); } }
@Override public void addSerDeProperties(HiveRegistrationUnit source, HiveRegistrationUnit target) throws IOException { if (source.getSerDeType().isPresent()) { target.setSerDeType(source.getSerDeType().get()); } if (source.getInputFormat().isPresent()) { target.setInputFormat(source.getInputFormat().get()); } if (source.getOutputFormat().isPresent()) { target.setOutputFormat(source.getOutputFormat().get()); } if (source.getSerDeProps().contains(SCHEMA_LITERAL)) { target.setSerDeProp(SCHEMA_LITERAL, source.getSerDeProps().getProp(SCHEMA_LITERAL)); } if (source.getSerDeProps().contains(SCHEMA_URL)) { target.setSerDeProp(SCHEMA_URL, source.getSerDeProps().getProp(SCHEMA_URL)); } }
/** * Add a {@link Schema} obtained from an Avro data file to the given {@link HiveRegistrationUnit}. * * <p> * If the length of the schema is less than {@link #SCHEMA_LITERAL_LENGTH_LIMIT}, it will be added via * {@link #SCHEMA_LITERAL}. Otherwise, the schema will be written to {@link #SCHEMA_FILE_NAME} and added * via {@link #SCHEMA_URL}. * </p> */ protected void addSchemaFromAvroFile(Schema schema, Path schemaFile, HiveRegistrationUnit hiveUnit) throws IOException { Preconditions.checkNotNull(schema); String schemaStr = schema.toString(); if (schemaStr.length() <= this.schemaLiteralLengthLimit) { hiveUnit.setSerDeProp(SCHEMA_LITERAL, schema.toString()); } else { Path schemaTempFile = null; if (useSchemaTempFile) { schemaTempFile = new Path(schemaFile.getParent(), this.schemaTempFileName); } AvroUtils.writeSchemaToFile(schema, schemaFile, schemaTempFile, this.fs, true); log.info("Using schema file " + schemaFile.toString()); hiveUnit.setSerDeProp(SCHEMA_URL, schemaFile.toString()); } }
private void addSchemaProperties(Path path, HiveRegistrationUnit hiveUnit, Schema schema) throws IOException { Path schemaFile = new Path(path, this.schemaFileName); if (this.useSchemaFile) { hiveUnit.setSerDeProp(SCHEMA_URL, schemaFile.toString()); } else { try (Timer.Context context = metricContext.timer(HIVE_SPEC_SCHEMA_WRITING_TIMER).time()) { addSchemaFromAvroFile(schema, schemaFile, hiveUnit); } } }
/** * Set serde parameters for a table/partition. * * <p> * When using {@link org.apache.gobblin.hive.metastore.HiveMetaStoreBasedRegister}, since it internally use * {@link org.apache.hadoop.hive.metastore.api.Table} and {@link org.apache.hadoop.hive.metastore.api.Partition} * which distinguishes between table/partition parameters, storage descriptor parameters, and serde parameters, * one may need to distinguish them when constructing a {@link HiveRegistrationUnit} by using * {@link #setProps(State)}, {@link #setStorageProps(State)} and * {@link #setSerDeProps(State)}. When using query-based Hive registration, they do not need to be * distinguished since all parameters will be passed via TBLPROPERTIES. * </p> */ public void setSerDeProps(State serdeProps) { for (String propKey : serdeProps.getPropertyNames()) { setSerDeProp(propKey, serdeProps.getProp(propKey)); } }
@Override public void updateSchema(HiveRegistrationUnit existingUnit, HiveRegistrationUnit newUnit) throws IOException { Preconditions.checkArgument( newUnit.getSerDeProps().contains(SCHEMA_LITERAL) || newUnit.getSerDeProps().contains(SCHEMA_URL)); if (newUnit.getSerDeProps().contains(SCHEMA_LITERAL)) { existingUnit.setSerDeProp(SCHEMA_LITERAL, newUnit.getSerDeProps().getProp(SCHEMA_LITERAL)); } else { existingUnit.setSerDeProp(SCHEMA_URL, newUnit.getSerDeProps().getProp(SCHEMA_URL)); } }
@Override public void addSerDeProperties(HiveRegistrationUnit source, HiveRegistrationUnit target) throws IOException { if (source.getSerDeType().isPresent()) { target.setSerDeType(source.getSerDeType().get()); } if (source.getInputFormat().isPresent()) { target.setInputFormat(source.getInputFormat().get()); } if (source.getOutputFormat().isPresent()) { target.setOutputFormat(source.getOutputFormat().get()); } if (source.getSerDeProps().contains(SCHEMA_LITERAL)) { target.setSerDeProp(SCHEMA_LITERAL, source.getSerDeProps().getProp(SCHEMA_LITERAL)); } if (source.getSerDeProps().contains(SCHEMA_URL)) { target.setSerDeProp(SCHEMA_URL, source.getSerDeProps().getProp(SCHEMA_URL)); } }