org.apache.hive.hcatalog.mapreduce.PartInfo java code examples

 hiveProps = HCatUtil.getHCatKeyHiveConf(jobConf);
List<String> setInputPath = setInputPath(jobConf, partitionInfo.getLocation());
if (setInputPath.isEmpty()) {
 continue;
Map<String, String> jobProperties = partitionInfo.getJobProperties();

private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd,
          Map<String, String> parameters, Configuration conf,
          InputJobInfo inputJobInfo) throws IOException {
 StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters);
 Properties hcatProperties = new Properties();
 HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
 // copy the properties from storageHandler to jobProperties
 Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo);
 for (String key : parameters.keySet()) {
  hcatProperties.put(key, parameters.get(key));
 }
 // FIXME
 // Bloating partinfo with inputJobInfo is not good
 return new PartInfo(schema, storageHandler, sd.getLocation(),
  hcatProperties, jobProperties, inputJobInfo.getTableInfo());
}

public static HiveStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException {
 return HCatUtil.getStorageHandler(
  conf,
  partitionInfo.getStorageHandlerClassName(),
  partitionInfo.getSerdeClassName(),
  partitionInfo.getInputFormatClassName(),
  partitionInfo.getOutputFormatClassName());
}

 /**
  * Deserialize this object, decompressing the partitions which can exceed the
  * allowed jobConf size.
  * @see <a href="https://issues.apache.org/jira/browse/HCATALOG-453">HCATALOG-453</a>
  */
 @SuppressWarnings("unchecked")
 private void readObject(ObjectInputStream ois)
  throws IOException, ClassNotFoundException {
  ois.defaultReadObject();

  //Next object in the stream will be a byte array of partition information which is compressed
  ObjectInputStream pis = new ObjectInputStream(new ByteArrayInputStream(
      (byte[])ois.readObject()));
  ObjectInputStream partInfoReader =
    new ObjectInputStream(new InflaterInputStream(pis));
  partitions = (List<PartInfo>)partInfoReader.readObject();
  if (partitions != null) {
   for (PartInfo partInfo : partitions) {
    if (partInfo.getTableInfo() == null) {
     partInfo.setTableInfo(this.tableInfo);
    }
   }
  }
  //Closing only the reader used for decompression byte stream
  partInfoReader.close();
 }
}

private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler,
        TaskAttemptContext taskContext) throws IOException {
 deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
  taskContext.getConfiguration());
 try {
  InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(),
   hcatSplit.getPartitionInfo().getTableInfo(),
   hcatSplit.getPartitionInfo().getPartitionSchema());
 } catch (SerDeException e) {
  throw new IOException("Failed initializing deserializer "
   + storageHandler.getSerDeClass().getName(), e);
 }
}

PartInfo partitionInfo = hcatSplit.getPartitionInfo();
if (partitionInfo.getTableInfo() == null) {
 partitionInfo.setTableInfo(
     HCatUtil.getLastInputJobInfosFromConf(taskContext.getConfiguration()).getTableInfo());
Map<String, String> jobProperties = partitionInfo.getJobProperties();
HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf);

/**
 * gets values for fields requested by output schema which will not be in the data
 */
private static Map<String, Object> getColValsNotInDataColumns(HCatSchema outputSchema,
                PartInfo partInfo) throws HCatException {
 HCatSchema dataSchema = partInfo.getPartitionSchema();
 Map<String, Object> vals = new HashMap<String, Object>();
 for (String fieldName : outputSchema.getFieldNames()) {
  if (dataSchema.getPosition(fieldName) == null) {
   // this entry of output is not present in the output schema
   // so, we first check the table schema to see if it is a part col
   if (partInfo.getPartitionValues().containsKey(fieldName)) {
    // First, get the appropriate field schema for this field
    HCatFieldSchema fschema = outputSchema.get(fieldName);
    // For a partition key type, this will be a primitive typeinfo.
    // Obtain relevant object inspector for this typeinfo
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(fschema.getTypeInfo());
    // get appropriate object from the string representation of the value in partInfo.getPartitionValues()
    // Essentially, partition values are represented as strings, but we want the actual object type associated
    Object objVal = ObjectInspectorConverters
      .getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
      .convert(partInfo.getPartitionValues().get(fieldName));
    vals.put(fieldName, objVal);
   } else {
    vals.put(fieldName, null);
   }
  }
 }
 return vals;
}

Path p = new Path(partInfo.getLocation());
if (p.getFileSystem(conf).isFile(p)) {
 sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen();

long size = 0;
for (final PartInfo partition : partitions) {
 String totalSize = partition.getInputStorageHandlerProperties().getProperty(StatsSetupConst.TOTAL_SIZE);
  long pathSize = SourceTargetHelper.getPathSize(conf, new Path(partition.getLocation()));
  if (pathSize == -1) {
   LOGGER.info("Unable to locate directory [{}]; skipping", partition.getLocation());

/**
 * Gets the table schema.
 * @return the table schema
 */
public HCatSchema getTableSchema() {
 assert this.partitionInfo.getTableInfo() != null : "TableInfo should have been set at this point.";
 return this.partitionInfo.getTableInfo().getAllColumns();
}

/**
 * Gets the data schema.
 * @return the table schema
 */
public HCatSchema getDataSchema() {
 return this.partitionInfo.getPartitionSchema();
}

private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit,
                  HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException {
 JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext);
 HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf);
 org.apache.hadoop.mapred.InputFormat inputFormat =
  HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass());
 return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf,
  InternalUtil.createReporter(taskContext));
}

PartInfo partitionInfo = hcatSplit.getPartitionInfo();
if (partitionInfo.getTableInfo() == null) {
 partitionInfo.setTableInfo(((InputJobInfo)HCatUtil.deserialize(
   taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)
 )).getTableInfo());
Map<String, String> jobProperties = partitionInfo.getJobProperties();
HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf);

private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler,
        TaskAttemptContext taskContext) throws IOException {
 deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
  taskContext.getConfiguration());
 try {
  InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(),
   hcatSplit.getPartitionInfo().getTableInfo(),
   hcatSplit.getPartitionInfo().getPartitionSchema());
 } catch (SerDeException e) {
  throw new IOException("Failed initializing deserializer "
   + storageHandler.getSerDeClass().getName(), e);
 }
}

 /**
  * Deserialize this object, decompressing the partitions which can exceed the
  * allowed jobConf size.
  * @see <a href="https://issues.apache.org/jira/browse/HCATALOG-453">HCATALOG-453</a>
  */
 @SuppressWarnings("unchecked")
 private void readObject(ObjectInputStream ois)
  throws IOException, ClassNotFoundException {
  ois.defaultReadObject();
  ObjectInputStream partInfoReader =
   new ObjectInputStream(new InflaterInputStream(ois));
  partitions = (List<PartInfo>)partInfoReader.readObject();
  for (PartInfo partInfo : partitions) {
   if (partInfo.getTableInfo() == null) {
    partInfo.setTableInfo(this.tableInfo);
   }
  }
 }
}

/**
 * gets values for fields requested by output schema which will not be in the data
 */
private static Map<String, Object> getColValsNotInDataColumns(HCatSchema outputSchema,
                PartInfo partInfo) throws HCatException {
 HCatSchema dataSchema = partInfo.getPartitionSchema();
 Map<String, Object> vals = new HashMap<String, Object>();
 for (String fieldName : outputSchema.getFieldNames()) {
  if (dataSchema.getPosition(fieldName) == null) {
   // this entry of output is not present in the output schema
   // so, we first check the table schema to see if it is a part col
   if (partInfo.getPartitionValues().containsKey(fieldName)) {
    // First, get the appropriate field schema for this field
    HCatFieldSchema fschema = outputSchema.get(fieldName);
    // For a partition key type, this will be a primitive typeinfo.
    // Obtain relevant object inspector for this typeinfo
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(fschema.getTypeInfo());
    // get appropriate object from the string representation of the value in partInfo.getPartitionValues()
    // Essentially, partition values are represented as strings, but we want the actual object type associated
    Object objVal = ObjectInspectorConverters
      .getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
      .convert(partInfo.getPartitionValues().get(fieldName));
    vals.put(fieldName, objVal);
   } else {
    vals.put(fieldName, null);
   }
  }
 }
 return vals;
}

Path p = new Path(partInfo.getLocation());
if (p.getFileSystem(conf).isFile(p)) {
 sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen();

/**
 * Gets the table schema.
 * @return the table schema
 */
public HCatSchema getTableSchema() {
 assert this.partitionInfo.getTableInfo() != null : "TableInfo should have been set at this point.";
 return this.partitionInfo.getTableInfo().getAllColumns();
}

/**
 * Gets the data schema.
 * @return the table schema
 */
public HCatSchema getDataSchema() {
 return this.partitionInfo.getPartitionSchema();
}

private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit,
                  HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException {
 JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext);
 HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf);
 org.apache.hadoop.mapred.InputFormat inputFormat =
  HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass());
 return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf,
  InternalUtil.createReporter(taskContext));
}

Javadoc

The Class used to serialize the partition information read from the metadata server that maps to a partition.

Most used methods

getLocation
Gets the value of location.
<init>
Instantiates a new hcat partition info.
getInputFormatClassName
getJobProperties
Gets the job properties.
getOutputFormatClassName
getPartitionSchema
Gets the value of partitionSchema.
getPartitionValues
Gets the partition values.
getSerdeClassName
getStorageHandlerClassName
getTableInfo
Gets the HCatalog table information.
setPartitionValues
Sets the partition values.
setTableInfo

Popular in Java

Creating JSON documents from java classes using gson
scheduleAtFixedRate (ScheduledExecutorService)
setRequestProperty (URLConnection)
onCreateOptionsMenu (Activity)
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
Set (java.util)
A Set is a data structure which does not allow duplicate elements.
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Top PhpStorm plugins

How to usePartInfo in org.apache.hive.hcatalog.mapreduce

Best Java code snippets using org.apache.hive.hcatalog.mapreduce.PartInfo (Showing top 20 results out of 315)

How to use
PartInfo
in
org.apache.hive.hcatalog.mapreduce