hiveProps = HCatUtil.getHCatKeyHiveConf(jobConf); List<String> setInputPath = setInputPath(jobConf, partitionInfo.getLocation()); if (setInputPath.isEmpty()) { continue; Map<String, String> jobProperties = partitionInfo.getJobProperties();
private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, Map<String, String> parameters, Configuration conf, InputJobInfo inputJobInfo) throws IOException { StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); Properties hcatProperties = new Properties(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); // copy the properties from storageHandler to jobProperties Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); for (String key : parameters.keySet()) { hcatProperties.put(key, parameters.get(key)); } // FIXME // Bloating partinfo with inputJobInfo is not good return new PartInfo(schema, storageHandler, sd.getLocation(), hcatProperties, jobProperties, inputJobInfo.getTableInfo()); }
public static HiveStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { return HCatUtil.getStorageHandler( conf, partitionInfo.getStorageHandlerClassName(), partitionInfo.getSerdeClassName(), partitionInfo.getInputFormatClassName(), partitionInfo.getOutputFormatClassName()); }
/** * Deserialize this object, decompressing the partitions which can exceed the * allowed jobConf size. * @see <a href="https://issues.apache.org/jira/browse/HCATALOG-453">HCATALOG-453</a> */ @SuppressWarnings("unchecked") private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException { ois.defaultReadObject(); //Next object in the stream will be a byte array of partition information which is compressed ObjectInputStream pis = new ObjectInputStream(new ByteArrayInputStream( (byte[])ois.readObject())); ObjectInputStream partInfoReader = new ObjectInputStream(new InflaterInputStream(pis)); partitions = (List<PartInfo>)partInfoReader.readObject(); if (partitions != null) { for (PartInfo partInfo : partitions) { if (partInfo.getTableInfo() == null) { partInfo.setTableInfo(this.tableInfo); } } } //Closing only the reader used for decompression byte stream partInfoReader.close(); } }
private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), taskContext.getConfiguration()); try { InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), hcatSplit.getPartitionInfo().getTableInfo(), hcatSplit.getPartitionInfo().getPartitionSchema()); } catch (SerDeException e) { throw new IOException("Failed initializing deserializer " + storageHandler.getSerDeClass().getName(), e); } }
PartInfo partitionInfo = hcatSplit.getPartitionInfo(); if (partitionInfo.getTableInfo() == null) { partitionInfo.setTableInfo( HCatUtil.getLastInputJobInfosFromConf(taskContext.getConfiguration()).getTableInfo()); Map<String, String> jobProperties = partitionInfo.getJobProperties(); HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf);
/** * gets values for fields requested by output schema which will not be in the data */ private static Map<String, Object> getColValsNotInDataColumns(HCatSchema outputSchema, PartInfo partInfo) throws HCatException { HCatSchema dataSchema = partInfo.getPartitionSchema(); Map<String, Object> vals = new HashMap<String, Object>(); for (String fieldName : outputSchema.getFieldNames()) { if (dataSchema.getPosition(fieldName) == null) { // this entry of output is not present in the output schema // so, we first check the table schema to see if it is a part col if (partInfo.getPartitionValues().containsKey(fieldName)) { // First, get the appropriate field schema for this field HCatFieldSchema fschema = outputSchema.get(fieldName); // For a partition key type, this will be a primitive typeinfo. // Obtain relevant object inspector for this typeinfo ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(fschema.getTypeInfo()); // get appropriate object from the string representation of the value in partInfo.getPartitionValues() // Essentially, partition values are represented as strings, but we want the actual object type associated Object objVal = ObjectInspectorConverters .getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) .convert(partInfo.getPartitionValues().get(fieldName)); vals.put(fieldName, objVal); } else { vals.put(fieldName, null); } } } return vals; }
Path p = new Path(partInfo.getLocation()); if (p.getFileSystem(conf).isFile(p)) { sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen();
long size = 0; for (final PartInfo partition : partitions) { String totalSize = partition.getInputStorageHandlerProperties().getProperty(StatsSetupConst.TOTAL_SIZE); long pathSize = SourceTargetHelper.getPathSize(conf, new Path(partition.getLocation())); if (pathSize == -1) { LOGGER.info("Unable to locate directory [{}]; skipping", partition.getLocation());
/** * Gets the table schema. * @return the table schema */ public HCatSchema getTableSchema() { assert this.partitionInfo.getTableInfo() != null : "TableInfo should have been set at this point."; return this.partitionInfo.getTableInfo().getAllColumns(); }
/** * Gets the data schema. * @return the table schema */ public HCatSchema getDataSchema() { return this.partitionInfo.getPartitionSchema(); }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
PartInfo partitionInfo = hcatSplit.getPartitionInfo(); if (partitionInfo.getTableInfo() == null) { partitionInfo.setTableInfo(((InputJobInfo)HCatUtil.deserialize( taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO) )).getTableInfo()); Map<String, String> jobProperties = partitionInfo.getJobProperties(); HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf);
private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), taskContext.getConfiguration()); try { InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), hcatSplit.getPartitionInfo().getTableInfo(), hcatSplit.getPartitionInfo().getPartitionSchema()); } catch (SerDeException e) { throw new IOException("Failed initializing deserializer " + storageHandler.getSerDeClass().getName(), e); } }
/** * Deserialize this object, decompressing the partitions which can exceed the * allowed jobConf size. * @see <a href="https://issues.apache.org/jira/browse/HCATALOG-453">HCATALOG-453</a> */ @SuppressWarnings("unchecked") private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException { ois.defaultReadObject(); ObjectInputStream partInfoReader = new ObjectInputStream(new InflaterInputStream(ois)); partitions = (List<PartInfo>)partInfoReader.readObject(); for (PartInfo partInfo : partitions) { if (partInfo.getTableInfo() == null) { partInfo.setTableInfo(this.tableInfo); } } } }
/** * gets values for fields requested by output schema which will not be in the data */ private static Map<String, Object> getColValsNotInDataColumns(HCatSchema outputSchema, PartInfo partInfo) throws HCatException { HCatSchema dataSchema = partInfo.getPartitionSchema(); Map<String, Object> vals = new HashMap<String, Object>(); for (String fieldName : outputSchema.getFieldNames()) { if (dataSchema.getPosition(fieldName) == null) { // this entry of output is not present in the output schema // so, we first check the table schema to see if it is a part col if (partInfo.getPartitionValues().containsKey(fieldName)) { // First, get the appropriate field schema for this field HCatFieldSchema fschema = outputSchema.get(fieldName); // For a partition key type, this will be a primitive typeinfo. // Obtain relevant object inspector for this typeinfo ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(fschema.getTypeInfo()); // get appropriate object from the string representation of the value in partInfo.getPartitionValues() // Essentially, partition values are represented as strings, but we want the actual object type associated Object objVal = ObjectInspectorConverters .getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi) .convert(partInfo.getPartitionValues().get(fieldName)); vals.put(fieldName, objVal); } else { vals.put(fieldName, null); } } } return vals; }
Path p = new Path(partInfo.getLocation()); if (p.getFileSystem(conf).isFile(p)) { sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen();
/** * Gets the table schema. * @return the table schema */ public HCatSchema getTableSchema() { assert this.partitionInfo.getTableInfo() != null : "TableInfo should have been set at this point."; return this.partitionInfo.getTableInfo().getAllColumns(); }
/** * Gets the data schema. * @return the table schema */ public HCatSchema getDataSchema() { return this.partitionInfo.getPartitionSchema(); }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }