public static LinkedList<InputJobInfo> getInputJobInfosFromConf(Configuration conf) throws IOException { LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( conf.get(HCatConstants.HCAT_KEY_JOB_INFO)); return inputJobInfos; }
public static void putInputJobInfoToConf(InputJobInfo inputJobInfo, Configuration conf) throws IOException { LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( conf.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); conf.set(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); }
private static HCatSchema getOutputSchema(Configuration conf) throws IOException { String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); if (os == null) { return getTableSchema(conf); } else { return (HCatSchema) HCatUtil.deserialize(os); } }
@SuppressWarnings("unchecked") private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { this.fieldNames = new String[in.readInt()]; for (int i = 0; i < this.fieldNames.length; i++) { this.fieldNames[i] = in.readUTF(); } Configuration configuration = new Configuration(); configuration.readFields(in); if (this.configuration == null) { this.configuration = configuration; } this.hCatInputFormat = new org.apache.hive.hcatalog.mapreduce.HCatInputFormat(); this.outputSchema = (HCatSchema) HCatUtil.deserialize(this.configuration.get("mapreduce.lib.hcat.output.schema")); }
/** * Gets the HCatOuputJobInfo object by reading the Configuration and deserializing * the string. If InputJobInfo is not present in the configuration, throws an * exception since that means HCatOutputFormat.setOutput has not been called. * @param conf the job Configuration object * @return the OutputJobInfo object * @throws IOException the IO exception */ public static OutputJobInfo getJobInfo(Configuration conf) throws IOException { String jobString = conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); if (jobString == null) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED); } return (OutputJobInfo) HCatUtil.deserialize(jobString); }
public static Map<String,String> getHCatKeyHiveConf(JobConf conf) { try { Properties properties = null; if (! StringUtils.isBlank(conf.get(HCatConstants.HCAT_KEY_HIVE_CONF))) { properties = (Properties) HCatUtil.deserialize( conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); LOG.info(HCatConstants.HCAT_KEY_HIVE_CONF + " is set. Using differences=" + properties); } else { LOG.info(HCatConstants.HCAT_KEY_HIVE_CONF + " not set. Generating configuration differences."); properties = getHiveSiteOverrides(conf); } // This method may not be safe as it can throw an NPE if a key or value is null. return Maps.fromProperties(properties); } catch (IOException e) { throw new IllegalStateException("Failed to deserialize hive conf", e); } }
private String generateKey(TaskAttemptContext context) throws IOException { String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); if (StringUtils.isBlank(jobInfoString)) { // Avoid the NPE. throw new IOException("Could not retrieve OutputJobInfo for TaskAttempt " + context.getTaskAttemptID()); } OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString); return context.getTaskAttemptID().toString() + "@" + jobInfo.getLocation(); }
public static HiveConf getHiveConf(Configuration conf) throws IOException { HiveConf hiveConf = new HiveConf(conf, HCatUtil.class); //copy the hive conf into the job conf and restore it //in the backend context if (StringUtils.isBlank(conf.get(HCatConstants.HCAT_KEY_HIVE_CONF))) { // Called once on the client. LOG.info(HCatConstants.HCAT_KEY_HIVE_CONF + " not set. Generating configuration differences."); Properties differences = getHiveSiteOverrides(conf); // Must set this key even if differences is empty otherwise client and AM will attempt // to set this multiple times. conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(differences)); } else { // Called one or more times on the client and AM. LOG.info(HCatConstants.HCAT_KEY_HIVE_CONF + " is set. Applying configuration differences."); Properties properties = (Properties) HCatUtil.deserialize( conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); storePropertiesToHiveConf(properties, hiveConf); } if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { hiveConf.setVar(HiveConf.ConfVars.METASTORE_TOKEN_SIGNATURE, conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE)); } return hiveConf; }
@SuppressWarnings("unchecked") @Override public void readFields(DataInput input) throws IOException { String partitionInfoString = WritableUtils.readString(input); partitionInfo = (PartInfo) HCatUtil.deserialize(partitionInfoString); String baseSplitClassName = WritableUtils.readString(input); org.apache.hadoop.mapred.InputSplit split; try { Class<? extends org.apache.hadoop.mapred.InputSplit> splitClass = (Class<? extends org.apache.hadoop.mapred.InputSplit>) JavaUtils.loadClass(baseSplitClassName); //Class.forName().newInstance() does not work if the underlying //InputSplit has package visibility Constructor<? extends org.apache.hadoop.mapred.InputSplit> constructor = splitClass.getDeclaredConstructor(new Class[]{}); constructor.setAccessible(true); split = constructor.newInstance(); // read baseSplit from input ((Writable) split).readFields(input); this.baseMapRedSplit = split; } catch (Exception e) { throw new IOException("Exception from " + baseSplitClassName, e); } }
if (jobInfoProperty != null) { LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( jobInfoProperty); if (inputJobInfos == null || inputJobInfos.isEmpty()) {
HCatConstants.HCAT_KEY_OUTPUT_INFO); OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil .deserialize(jobInfoString); StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(
@Override public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); createDeserializer(hcatSplit, storageHandler, taskContext); // Pull the output schema out of the TaskAttemptContext outputSchema = (HCatSchema) HCatUtil.deserialize( taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); if (outputSchema == null) { outputSchema = hcatSplit.getTableSchema(); } // Pull the table schema out of the Split info // TODO This should be passed in the TaskAttemptContext instead dataSchema = hcatSplit.getDataSchema(); errorTracker = new InputErrorTracker(taskContext.getConfiguration()); }
Configuration conf = jobContext.getConfiguration(); try { OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(jobContext.getConfiguration(),jobInfo.getTableInfo().getStorerInfo());
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { Map<String, String> properties = tableDesc.getJobProperties(); LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( properties.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); properties.put(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
try { OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(tableDesc.getJobProperties().get( HCatConstants.HCAT_KEY_OUTPUT_INFO)); String parentPath = jobInfo.getTableInfo().getTableLocation();
private static HCatSchema getOutputSchema(Configuration conf) throws IOException { String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); if (os == null) { return getTableSchema(conf); } else { return (HCatSchema) HCatUtil.deserialize(os); } }
private static HCatSchema getOutputSchema(Configuration conf) throws IOException { String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); if (os == null) { return getTableSchema(conf); } else { return (HCatSchema) HCatUtil.deserialize(os); } }
private static HCatSchema getOutputSchema(Configuration conf) throws IOException { String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); if (os == null) { return getTableSchema(conf); } else { return (HCatSchema) HCatUtil.deserialize(os); } }
private static HCatSchema getOutputSchema(Configuration conf) throws IOException { String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); if (os == null) { return getTableSchema(conf); } else { return (HCatSchema) HCatUtil.deserialize(os); } }
/** * Return partitioning columns for this input, can only be called after setInput is called. * @return partitioning columns of the table specified by the job. * @throws IOException */ public static HCatSchema getPartitionColumns(Configuration conf) throws IOException { InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize( conf.get(HCatConstants.HCAT_KEY_JOB_INFO)); Preconditions.checkNotNull(inputInfo, "inputJobInfo is null, setInput has not yet been called to save job into conf supplied."); return inputInfo.getTableInfo().getPartitionColumns(); }