/** * Set the schema for the HCatRecord data returned by HCatInputFormat. * @param job the job object * @param hcatSchema the schema to use as the consolidated schema */ public static void setOutputSchema(Job job, HCatSchema hcatSchema) throws IOException { job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(hcatSchema)); }
public static void putInputJobInfoToConf(InputJobInfo inputJobInfo, Configuration conf) throws IOException { LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( conf.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); conf.set(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); }
/** * Specifies the fields which are returned by the InputFormat and their order. * * @param fields The fields and their order which are returned by the InputFormat. * @return This InputFormat with specified return fields. * @throws java.io.IOException */ public HCatInputFormatBase<T> getFields(String... fields) throws IOException { // build output schema ArrayList<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(fields.length); for (String field : fields) { fieldSchemas.add(this.outputSchema.get(field)); } this.outputSchema = new HCatSchema(fieldSchemas); // update output schema configuration configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema)); return this; }
@Override public void write(DataOutput output) throws IOException { String partitionInfoString = HCatUtil.serialize(partitionInfo); // write partitionInfo into output WritableUtils.writeString(output, partitionInfoString); WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); Writable baseSplitWritable = (Writable) baseMapRedSplit; //write baseSplit into output baseSplitWritable.write(output); }
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }
public static HiveConf getHiveConf(Configuration conf) throws IOException { HiveConf hiveConf = new HiveConf(conf, HCatUtil.class); //copy the hive conf into the job conf and restore it //in the backend context if (StringUtils.isBlank(conf.get(HCatConstants.HCAT_KEY_HIVE_CONF))) { // Called once on the client. LOG.info(HCatConstants.HCAT_KEY_HIVE_CONF + " not set. Generating configuration differences."); Properties differences = getHiveSiteOverrides(conf); // Must set this key even if differences is empty otherwise client and AM will attempt // to set this multiple times. conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(differences)); } else { // Called one or more times on the client and AM. LOG.info(HCatConstants.HCAT_KEY_HIVE_CONF + " is set. Applying configuration differences."); Properties properties = (Properties) HCatUtil.deserialize( conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); storePropertiesToHiveConf(properties, hiveConf); } if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { hiveConf.setVar(HiveConf.ConfVars.METASTORE_TOKEN_SIGNATURE, conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE)); } return hiveConf; }
/** * Creates a HCatInputFormat for the given database, table, and * {@link org.apache.hadoop.conf.Configuration}. * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}. * The return type of the InputFormat can be changed to Flink-native tuples by calling * {@link HCatInputFormatBase#asFlinkTuples()}. * * @param database The name of the database to read from. * @param table The name of the table to read. * @param config The Configuration for the InputFormat. * @throws java.io.IOException */ public HCatInputFormatBase(String database, String table, Configuration config) throws IOException { super(); this.configuration = config; HadoopUtils.mergeHadoopConf(this.configuration); this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table); this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration); // configure output schema of HCatFormat configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema)); // set type information this.resultType = new WritableTypeInfo(DefaultHCatRecord.class); }
tableDesc.getJobProperties().put( HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { Map<String, String> properties = tableDesc.getJobProperties(); LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( properties.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); properties.put(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
HCatUtil.serialize(jobInfo)); } catch (IOException e) { throw new IllegalStateException("Failed to set output path", e);
/** * Set the schema for the HCatRecord data returned by HCatInputFormat. * @param job the job object * @param hcatSchema the schema to use as the consolidated schema */ public static void setOutputSchema(Job job, HCatSchema hcatSchema) throws IOException { job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(hcatSchema)); }
/** * Set the schema for the HCatRecord data returned by HCatInputFormat. * @param job the job object * @param hcatSchema the schema to use as the consolidated schema */ public static void setOutputSchema(Job job, HCatSchema hcatSchema) throws IOException { job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(hcatSchema)); }
/** * Set the schema for the HCatRecord data returned by HCatInputFormat. * @param job the job object * @param hcatSchema the schema to use as the consolidated schema */ public static void setOutputSchema(Job job, HCatSchema hcatSchema) throws IOException { job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(hcatSchema)); }
/** * Set the schema for the HCatRecord data returned by HCatInputFormat. * @param job the job object * @param hcatSchema the schema to use as the consolidated schema */ public static void setOutputSchema(Job job, HCatSchema hcatSchema) throws IOException { job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(hcatSchema)); }
@Override public void write(DataOutput output) throws IOException { String partitionInfoString = HCatUtil.serialize(partitionInfo); // write partitionInfo into output WritableUtils.writeString(output, partitionInfoString); WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); Writable baseSplitWritable = (Writable) baseMapRedSplit; //write baseSplit into output baseSplitWritable.write(output); }
@Override public void write(DataOutput output) throws IOException { String partitionInfoString = HCatUtil.serialize(partitionInfo); // write partitionInfo into output WritableUtils.writeString(output, partitionInfoString); WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); Writable baseSplitWritable = (Writable) baseMapRedSplit; //write baseSplit into output baseSplitWritable.write(output); }
@Override public void write(DataOutput output) throws IOException { String partitionInfoString = HCatUtil.serialize(partitionInfo); // write partitionInfo into output WritableUtils.writeString(output, partitionInfoString); WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); Writable baseSplitWritable = (Writable) baseMapRedSplit; //write baseSplit into output baseSplitWritable.write(output); }
@Override public void write(DataOutput output) throws IOException { String partitionInfoString = HCatUtil.serialize(partitionInfo); // write partitionInfo into output WritableUtils.writeString(output, partitionInfoString); WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); Writable baseSplitWritable = (Writable) baseMapRedSplit; //write baseSplit into output baseSplitWritable.write(output); }
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }
/** * Set the schema for the data being written out to the partition. The * table schema is used by default for the partition if this is not called. * @param conf the job Configuration object * @param schema the schema for the data * @throws IOException */ public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); Map<String, String> partMap = jobInfo.getPartitionValues(); setPartDetails(jobInfo, schema, partMap); conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); }