long sizeInBytes = 0; for (PartInfo partInfo : inputJobInfo.getPartitions()) { try { Path p = new Path(partInfo.getLocation());
/** * @deprecated as of 0.13, slated for removal with 0.15 * Use {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} instead, * to specify a partition filter to directly initialize the input with. */ @Deprecated public HCatInputFormat setFilter(String filter) throws IOException { // null filters are supported to simplify client code if (filter != null) { inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), filter, inputJobInfo.getProperties()); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } } return this; }
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { Map<String, String> properties = tableDesc.getJobProperties(); LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( properties.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); properties.put(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
/** * Get statistics about the data to be loaded. Only input data size is implemented at this time. */ @Override public ResourceStatistics getStatistics(String location, Job job) throws IOException { try { if (dbName == null || tableName == null) { throw new IOException("DB or table name unset. setLocation() must be invoked on this " + "loader to set them"); } ResourceStatistics stats = new ResourceStatistics(); long inputSize = -1; LinkedList<InputJobInfo> inputJobInfos = HCatUtil.getInputJobInfosFromConf( job.getConfiguration()); for (InputJobInfo inputJobInfo : inputJobInfos) { if (dbName.equals(inputJobInfo.getDatabaseName()) && tableName.equals(inputJobInfo.getTableName())){ inputSize = getSizeInBytes(inputJobInfo); break; } } if (inputSize == -1) { throw new IOException("Could not calculate input size for database: " + dbName + ", " + "table: " + tableName + ". Requested location:" + location); } stats.setSizeInBytes(inputSize); return stats; } catch (Exception e) { throw new IOException(e); } }
Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()); inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); if (table.getPartitionKeys().size() != 0) { List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), (short) -1); partInfoList.add(partInfo); inputJobInfo.setPartitions(partInfoList);
/** * Return partitioning columns for this input, can only be called after setInput is called, * since that takes care of adding a populated InputJobInfo object to its list in this job conf. * @return partitioning columns of the table specified by the job. * @throws IOException */ public static HCatSchema getPartitionColumns(Configuration conf) throws IOException { InputJobInfo inputInfo = HCatUtil.getLastInputJobInfosFromConf(conf); Preconditions.checkNotNull(inputInfo, "inputJobInfo is null, setInput has not yet been called to save job into conf supplied."); return inputInfo.getTableInfo().getPartitionColumns(); }
@Test public void testInputJobInfoInConf() throws Exception { Configuration conf = new Configuration(false); InputJobInfo inputJobInfo = HCatUtil.getLastInputJobInfosFromConf(conf); Assert.assertNull(inputJobInfo); List<InputJobInfo> inputJobInfos = HCatUtil.getInputJobInfosFromConf(conf); Assert.assertNull(inputJobInfos); InputJobInfo inputJobInfo0 = InputJobInfo.create("db", "table", "", new Properties()); InputJobInfo inputJobInfo1 = InputJobInfo.create("db", "table2", "", new Properties()); HCatUtil.putInputJobInfoToConf(inputJobInfo0, conf); HCatUtil.putInputJobInfoToConf(inputJobInfo1, conf); inputJobInfo = HCatUtil.getLastInputJobInfosFromConf(conf); inputJobInfos = HCatUtil.getInputJobInfosFromConf(conf); Assert.assertEquals(inputJobInfo1.getDatabaseName(), inputJobInfo.getDatabaseName()); Assert.assertEquals(inputJobInfo1.getTableName(), inputJobInfo.getTableName()); Assert.assertEquals(inputJobInfo0.getDatabaseName(), inputJobInfos.get(0).getDatabaseName()); Assert.assertEquals(inputJobInfo0.getTableName(), inputJobInfos.get(0).getTableName()); } }
/** * Set inputs to use for the job. This queries the metastore with the given input * specification and serializes matching partitions into the job conf for use by MR tasks. * @param conf the job configuration * @param dbName database name, which if null 'default' is used * @param tableName table name * @param filter the partition filter to use, can be null for no filter * @throws IOException on all errors */ public static HCatInputFormat setInput( Configuration conf, String dbName, String tableName, String filter) throws IOException { Preconditions.checkNotNull(conf, "required argument 'conf' is null"); Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); HCatInputFormat hCatInputFormat = new HCatInputFormat(); hCatInputFormat.conf = conf; hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, filter, null); try { InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); } catch (Exception e) { throw new IOException(e); } return hCatInputFormat; }
/** * Initializes a new InputJobInfo * for reading data from a table. * @param databaseName the db name * @param tableName the table name * @param filter the partition filter * @param properties implementation specific job properties */ public static InputJobInfo create(String databaseName, String tableName, String filter, Properties properties) { return new InputJobInfo(databaseName, tableName, filter, properties); }
Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()); inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); if (table.getPartitionKeys().size() != 0) { List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), (short) -1); partInfoList.add(partInfo); inputJobInfo.setPartitions(partInfoList);
/** * Return data columns for this input, can only be called after setInput is called, * since that takes care of adding a populated InputJobInfo object to its list in this job conf. * @return data columns of the table specified by the job. * @throws IOException */ public static HCatSchema getDataColumns(Configuration conf) throws IOException { InputJobInfo inputInfo = HCatUtil.getLastInputJobInfosFromConf(conf); Preconditions.checkNotNull(inputInfo, "inputJobInfo is null, setInput has not yet been called to save job into conf supplied."); return inputInfo.getTableInfo().getDataColumns(); } }
/** * Set inputs to use for the job. This queries the metastore with the given input * specification and serializes matching partitions into the job conf for use by MR tasks. * @param conf the job configuration * @param dbName database name, which if null 'default' is used * @param tableName table name * @param filter the partition filter to use, can be null for no filter * @throws IOException on all errors */ public static HCatInputFormat setInput( Configuration conf, String dbName, String tableName, String filter) throws IOException { Preconditions.checkNotNull(conf, "required argument 'conf' is null"); Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); HCatInputFormat hCatInputFormat = new HCatInputFormat(); hCatInputFormat.conf = conf; hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, filter, null); try { InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); } catch (Exception e) { throw new IOException(e); } return hCatInputFormat; }
/** * Initializes a new InputJobInfo * for reading data from a table. * @param databaseName the db name * @param tableName the table name * @param filter the partition filter * @param properties implementation specific job properties */ public static InputJobInfo create(String databaseName, String tableName, String filter, Properties properties) { return new InputJobInfo(databaseName, tableName, filter, properties); }
/** * Set properties for the input format. * @param properties properties for the input specification * @return this * @throws IOException on all errors */ public HCatInputFormat setProperties(Properties properties) throws IOException { Preconditions.checkNotNull(properties, "required argument 'properties' is null"); inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), properties); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } return this; }
Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()); inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); if (table.getPartitionKeys().size() != 0) { List<Partition> parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), (short) -1); partInfoList.add(partInfo); inputJobInfo.setPartitions(partInfoList);
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { tableDesc.getJobProperties().put( HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfo)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, Map<String, String> parameters, Configuration conf, InputJobInfo inputJobInfo) throws IOException { StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); Properties hcatProperties = new Properties(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); // copy the properties from storageHandler to jobProperties Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); for (String key : parameters.keySet()) { hcatProperties.put(key, parameters.get(key)); } // FIXME // Bloating partinfo with inputJobInfo is not good return new PartInfo(schema, storageHandler, sd.getLocation(), hcatProperties, jobProperties, inputJobInfo.getTableInfo()); }
List<PartInfo> partitionInfoList = inputJobInfo.getPartitions(); if (partitionInfoList == null) {
/** * Set inputs to use for the job. This queries the metastore with the given input * specification and serializes matching partitions into the job conf for use by MR tasks. * @param conf the job configuration * @param dbName database name, which if null 'default' is used * @param tableName table name * @param filter the partition filter to use, can be null for no filter * @throws IOException on all errors */ public static HCatInputFormat setInput( Configuration conf, String dbName, String tableName, String filter) throws IOException { Preconditions.checkNotNull(conf, "required argument 'conf' is null"); Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); HCatInputFormat hCatInputFormat = new HCatInputFormat(); hCatInputFormat.conf = conf; hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, filter, null); try { InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); } catch (Exception e) { throw new IOException(e); } return hCatInputFormat; }
/** * Initializes a new InputJobInfo * for reading data from a table. * @param databaseName the db name * @param tableName the table name * @param filter the partition filter * @param properties implementation specific job properties */ public static InputJobInfo create(String databaseName, String tableName, String filter, Properties properties) { return new InputJobInfo(databaseName, tableName, filter, properties); }