org.apache.hive.hcatalog.mapreduce.InputJobInfo java code examples

long sizeInBytes = 0;
for (PartInfo partInfo : inputJobInfo.getPartitions()) {
 try {
  Path p = new Path(partInfo.getLocation());

/**
 * @deprecated as of 0.13, slated for removal with 0.15
 * Use {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} instead,
 * to specify a partition filter to directly initialize the input with.
 */
@Deprecated
public HCatInputFormat setFilter(String filter) throws IOException {
 // null filters are supported to simplify client code
 if (filter != null) {
  inputJobInfo = InputJobInfo.create(
   inputJobInfo.getDatabaseName(),
   inputJobInfo.getTableName(),
   filter,
   inputJobInfo.getProperties());
  try {
   InitializeInput.setInput(conf, inputJobInfo);
  } catch (Exception e) {
   throw new IOException(e);
  }
 }
 return this;
}

public static Map<String, String>
getInputJobProperties(HiveStorageHandler storageHandler,
  InputJobInfo inputJobInfo) {
 Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties();
 props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName());
 TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(),
  storageHandler.getOutputFormatClass(),props);
 if (tableDesc.getJobProperties() == null) {
  tableDesc.setJobProperties(new HashMap<String, String>());
 }
 Properties mytableProperties = tableDesc.getProperties();
 mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName());
 Map<String, String> jobProperties = new HashMap<String, String>();
 try {
  Map<String, String> properties = tableDesc.getJobProperties();
  LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize(
      properties.get(HCatConstants.HCAT_KEY_JOB_INFO));
  if (inputJobInfos == null) {
   inputJobInfos = new LinkedList<>();
  }
  inputJobInfos.add(inputJobInfo);
  properties.put(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos));
  storageHandler.configureInputJobProperties(tableDesc, jobProperties);
 } catch (IOException e) {
  throw new IllegalStateException(
   "Failed to configure StorageHandler", e);
 }
 return jobProperties;
}

/**
 * Get statistics about the data to be loaded. Only input data size is implemented at this time.
 */
@Override
public ResourceStatistics getStatistics(String location, Job job) throws IOException {
 try {
  if (dbName == null || tableName == null) {
   throw new IOException("DB or table name unset. setLocation() must be invoked on this " +
       "loader to set them");
  }
  ResourceStatistics stats = new ResourceStatistics();
  long inputSize = -1;
  LinkedList<InputJobInfo> inputJobInfos = HCatUtil.getInputJobInfosFromConf(
      job.getConfiguration());
  for (InputJobInfo inputJobInfo : inputJobInfos) {
   if (dbName.equals(inputJobInfo.getDatabaseName()) && tableName.equals(inputJobInfo.getTableName())){
    inputSize = getSizeInBytes(inputJobInfo);
    break;
   }
  }
  if (inputSize == -1) {
   throw new IOException("Could not calculate input size for database: " + dbName + ", " +
       "table: " + tableName + ". Requested location:" + location);
  }
  stats.setSizeInBytes(inputSize);
  return stats;
 } catch (Exception e) {
  throw new IOException(e);
 }
}

Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(),
 inputJobInfo.getTableName());
inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
if (table.getPartitionKeys().size() != 0) {
 List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(),
  inputJobInfo.getTableName(),
  inputJobInfo.getFilter(),
  (short) -1);
 partInfoList.add(partInfo);
inputJobInfo.setPartitions(partInfoList);

/**
 * Return partitioning columns for this input, can only be called after setInput is called,
 * since that takes care of adding a populated InputJobInfo object to its list in this job conf.
 * @return partitioning columns of the table specified by the job.
 * @throws IOException
 */
public static HCatSchema getPartitionColumns(Configuration conf) throws IOException {
 InputJobInfo inputInfo = HCatUtil.getLastInputJobInfosFromConf(conf);
 Preconditions.checkNotNull(inputInfo,
   "inputJobInfo is null, setInput has not yet been called to save job into conf supplied.");
 return inputInfo.getTableInfo().getPartitionColumns();
}

 @Test
 public void testInputJobInfoInConf() throws Exception {
  Configuration conf = new Configuration(false);

  InputJobInfo inputJobInfo = HCatUtil.getLastInputJobInfosFromConf(conf);
  Assert.assertNull(inputJobInfo);
  List<InputJobInfo> inputJobInfos = HCatUtil.getInputJobInfosFromConf(conf);
  Assert.assertNull(inputJobInfos);

  InputJobInfo inputJobInfo0 = InputJobInfo.create("db", "table", "", new Properties());
  InputJobInfo inputJobInfo1 = InputJobInfo.create("db", "table2", "", new Properties());

  HCatUtil.putInputJobInfoToConf(inputJobInfo0, conf);
  HCatUtil.putInputJobInfoToConf(inputJobInfo1, conf);

  inputJobInfo = HCatUtil.getLastInputJobInfosFromConf(conf);
  inputJobInfos = HCatUtil.getInputJobInfosFromConf(conf);

  Assert.assertEquals(inputJobInfo1.getDatabaseName(), inputJobInfo.getDatabaseName());
  Assert.assertEquals(inputJobInfo1.getTableName(), inputJobInfo.getTableName());
  Assert.assertEquals(inputJobInfo0.getDatabaseName(), inputJobInfos.get(0).getDatabaseName());
  Assert.assertEquals(inputJobInfo0.getTableName(), inputJobInfos.get(0).getTableName());

 }
}

/**
 * Set inputs to use for the job. This queries the metastore with the given input
 * specification and serializes matching partitions into the job conf for use by MR tasks.
 * @param conf the job configuration
 * @param dbName database name, which if null 'default' is used
 * @param tableName table name
 * @param filter the partition filter to use, can be null for no filter
 * @throws IOException on all errors
 */
public static HCatInputFormat setInput(
    Configuration conf, String dbName, String tableName, String filter)
 throws IOException {
 Preconditions.checkNotNull(conf, "required argument 'conf' is null");
 Preconditions.checkNotNull(tableName, "required argument 'tableName' is null");
 HCatInputFormat hCatInputFormat = new HCatInputFormat();
 hCatInputFormat.conf = conf;
 hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, filter, null);
 try {
  InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo);
 } catch (Exception e) {
  throw new IOException(e);
 }
 return hCatInputFormat;
}

/**
 * Initializes a new InputJobInfo
 * for reading data from a table.
 * @param databaseName the db name
 * @param tableName the table name
 * @param filter the partition filter
 * @param properties implementation specific job properties
 */
public static InputJobInfo create(String databaseName,
         String tableName,
         String filter,
         Properties properties) {
 return new InputJobInfo(databaseName, tableName, filter, properties);
}

Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(),
 inputJobInfo.getTableName());
inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
if (table.getPartitionKeys().size() != 0) {
 List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(),
  inputJobInfo.getTableName(),
  inputJobInfo.getFilter(),
  (short) -1);
 partInfoList.add(partInfo);
inputJobInfo.setPartitions(partInfoList);

 /**
  * Return data columns for this input, can only be called after setInput is called,
  * since that takes care of adding a populated InputJobInfo object to its list in this job conf.
  * @return data columns of the table specified by the job.
  * @throws IOException
  */
 public static HCatSchema getDataColumns(Configuration conf) throws IOException {
  InputJobInfo inputInfo = HCatUtil.getLastInputJobInfosFromConf(conf);
  Preconditions.checkNotNull(inputInfo,
    "inputJobInfo is null, setInput has not yet been called to save job into conf supplied.");
  return inputInfo.getTableInfo().getDataColumns();
 }
}

/**
 * Set inputs to use for the job. This queries the metastore with the given input
 * specification and serializes matching partitions into the job conf for use by MR tasks.
 * @param conf the job configuration
 * @param dbName database name, which if null 'default' is used
 * @param tableName table name
 * @param filter the partition filter to use, can be null for no filter
 * @throws IOException on all errors
 */
public static HCatInputFormat setInput(
    Configuration conf, String dbName, String tableName, String filter)
 throws IOException {
 Preconditions.checkNotNull(conf, "required argument 'conf' is null");
 Preconditions.checkNotNull(tableName, "required argument 'tableName' is null");
 HCatInputFormat hCatInputFormat = new HCatInputFormat();
 hCatInputFormat.conf = conf;
 hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, filter, null);
 try {
  InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo);
 } catch (Exception e) {
  throw new IOException(e);
 }
 return hCatInputFormat;
}

/**
 * Initializes a new InputJobInfo
 * for reading data from a table.
 * @param databaseName the db name
 * @param tableName the table name
 * @param filter the partition filter
 * @param properties implementation specific job properties
 */
public static InputJobInfo create(String databaseName,
         String tableName,
         String filter,
         Properties properties) {
 return new InputJobInfo(databaseName, tableName, filter, properties);
}

/**
 * Set properties for the input format.
 * @param properties properties for the input specification
 * @return this
 * @throws IOException on all errors
 */
public HCatInputFormat setProperties(Properties properties) throws IOException {
 Preconditions.checkNotNull(properties, "required argument 'properties' is null");
 inputJobInfo = InputJobInfo.create(
  inputJobInfo.getDatabaseName(),
  inputJobInfo.getTableName(),
  inputJobInfo.getFilter(),
  properties);
 try {
  InitializeInput.setInput(conf, inputJobInfo);
 } catch (Exception e) {
  throw new IOException(e);
 }
 return this;
}

Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(),
 inputJobInfo.getTableName());
inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable()));
if (table.getPartitionKeys().size() != 0) {
 List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(),
  inputJobInfo.getTableName(),
  inputJobInfo.getFilter(),
  (short) -1);
 partInfoList.add(partInfo);
inputJobInfo.setPartitions(partInfoList);

public static Map<String, String>
getInputJobProperties(HiveStorageHandler storageHandler,
  InputJobInfo inputJobInfo) {
 Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties();
 props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName());
 TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(),
  storageHandler.getOutputFormatClass(),props);
 if (tableDesc.getJobProperties() == null) {
  tableDesc.setJobProperties(new HashMap<String, String>());
 }
 Properties mytableProperties = tableDesc.getProperties();
 mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName());
 Map<String, String> jobProperties = new HashMap<String, String>();
 try {
  tableDesc.getJobProperties().put(
   HCatConstants.HCAT_KEY_JOB_INFO,
   HCatUtil.serialize(inputJobInfo));
  storageHandler.configureInputJobProperties(tableDesc,
   jobProperties);
 } catch (IOException e) {
  throw new IllegalStateException(
   "Failed to configure StorageHandler", e);
 }
 return jobProperties;
}

private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd,
          Map<String, String> parameters, Configuration conf,
          InputJobInfo inputJobInfo) throws IOException {
 StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters);
 Properties hcatProperties = new Properties();
 HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo);
 // copy the properties from storageHandler to jobProperties
 Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo);
 for (String key : parameters.keySet()) {
  hcatProperties.put(key, parameters.get(key));
 }
 // FIXME
 // Bloating partinfo with inputJobInfo is not good
 return new PartInfo(schema, storageHandler, sd.getLocation(),
  hcatProperties, jobProperties, inputJobInfo.getTableInfo());
}

List<PartInfo> partitionInfoList = inputJobInfo.getPartitions();
if (partitionInfoList == null) {

/**
 * Set inputs to use for the job. This queries the metastore with the given input
 * specification and serializes matching partitions into the job conf for use by MR tasks.
 * @param conf the job configuration
 * @param dbName database name, which if null 'default' is used
 * @param tableName table name
 * @param filter the partition filter to use, can be null for no filter
 * @throws IOException on all errors
 */
public static HCatInputFormat setInput(
    Configuration conf, String dbName, String tableName, String filter)
 throws IOException {
 Preconditions.checkNotNull(conf, "required argument 'conf' is null");
 Preconditions.checkNotNull(tableName, "required argument 'tableName' is null");
 HCatInputFormat hCatInputFormat = new HCatInputFormat();
 hCatInputFormat.conf = conf;
 hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, filter, null);
 try {
  InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo);
 } catch (Exception e) {
  throw new IOException(e);
 }
 return hCatInputFormat;
}

/**
 * Initializes a new InputJobInfo
 * for reading data from a table.
 * @param databaseName the db name
 * @param tableName the table name
 * @param filter the partition filter
 * @param properties implementation specific job properties
 */
public static InputJobInfo create(String databaseName,
         String tableName,
         String filter,
         Properties properties) {
 return new InputJobInfo(databaseName, tableName, filter, properties);
}

Javadoc

Container for metadata read from the metadata server. Prior to release 0.5, InputJobInfo was a key part of the public API, exposed directly to end-users as an argument to HCatInputFormat#setInput(org.apache.hadoop.mapreduce.Job, InputJobInfo). Going forward, we plan on treating InputJobInfo as an implementation detail and no longer expose to end-users. Should you have a need to use InputJobInfo outside HCatalog itself, please contact the developer mailing list before depending on this class.

Most used methods

getPartitions
getDatabaseName
Gets the value of databaseName
getTableInfo
Gets the table's meta information
getTableName
Gets the value of tableName
create
Initializes a new InputJobInfo for reading data from a table.
getFilter
Gets the value of partition filter
getProperties
Set/Get Property information to be passed down to *StorageHandler implementation put implementation
<init>
setPartitions
setTableInfo
set the tablInfo instance this should be the same instance determined by this object's DatabaseName

Popular in Java

Reading from database using SQL prepared statement
addToBackStack (FragmentTransaction)
getExternalFilesDir (Context)
runOnUiThread (Activity)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Kernel (java.awt.image)
Option (scala)
Github Copilot alternatives

How to useInputJobInfo in org.apache.hive.hcatalog.mapreduce

Best Java code snippets using org.apache.hive.hcatalog.mapreduce.InputJobInfo (Showing top 20 results out of 315)

How to use
InputJobInfo
in
org.apache.hive.hcatalog.mapreduce