/** * @deprecated as of 0.13, slated for removal with 0.15 * Use {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} instead, * to specify a partition filter to directly initialize the input with. */ @Deprecated public HCatInputFormat setFilter(String filter) throws IOException { // null filters are supported to simplify client code if (filter != null) { inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), filter, inputJobInfo.getProperties()); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } } return this; }
/** * Get statistics about the data to be loaded. Only input data size is implemented at this time. */ @Override public ResourceStatistics getStatistics(String location, Job job) throws IOException { try { if (dbName == null || tableName == null) { throw new IOException("DB or table name unset. setLocation() must be invoked on this " + "loader to set them"); } ResourceStatistics stats = new ResourceStatistics(); long inputSize = -1; LinkedList<InputJobInfo> inputJobInfos = HCatUtil.getInputJobInfosFromConf( job.getConfiguration()); for (InputJobInfo inputJobInfo : inputJobInfos) { if (dbName.equals(inputJobInfo.getDatabaseName()) && tableName.equals(inputJobInfo.getTableName())){ inputSize = getSizeInBytes(inputJobInfo); break; } } if (inputSize == -1) { throw new IOException("Could not calculate input size for database: " + dbName + ", " + "table: " + tableName + ". Requested location:" + location); } stats.setSizeInBytes(inputSize); return stats; } catch (Exception e) { throw new IOException(e); } }
/** * Set properties for the input format. * @param properties properties for the input specification * @return this * @throws IOException on all errors */ public HCatInputFormat setProperties(Properties properties) throws IOException { Preconditions.checkNotNull(properties, "required argument 'properties' is null"); inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), properties); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } return this; }
@Test public void test4ArgCreate() throws Exception { Properties p = new Properties(); p.setProperty("key", "value"); InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter", p); Assert.assertEquals("Db", jobInfo.getDatabaseName()); Assert.assertEquals("Table", jobInfo.getTableName()); Assert.assertEquals("Filter", jobInfo.getFilter()); Assert.assertEquals("value", jobInfo.getProperties().getProperty("key")); }
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { Map<String, String> properties = tableDesc.getJobProperties(); LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( properties.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); properties.put(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
/** * Set the input to use for the Job. This queries the metadata server with the specified * partition predicates, gets the matching partitions, and puts the information in the job * configuration object. * * To ensure a known InputJobInfo state, only the database name, table name, filter, and * properties are preserved. All other modification from the given InputJobInfo are discarded. * * After calling setInput, InputJobInfo can be retrieved from the job configuration as follows: * {code} * LinkedList<InputJobInfo> inputInfo = (LinkedList<InputJobInfo>) HCatUtil * .deserialize(job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); * {code} * * @param conf the job Configuration object * @param theirInputJobInfo information on the Input to read * @throws Exception */ public static void setInput(Configuration conf, InputJobInfo theirInputJobInfo) throws Exception { InputJobInfo inputJobInfo = InputJobInfo.create( theirInputJobInfo.getDatabaseName(), theirInputJobInfo.getTableName(), theirInputJobInfo.getFilter(), theirInputJobInfo.getProperties()); populateInputJobInfo(conf, inputJobInfo, null); HCatUtil.putInputJobInfoToConf(inputJobInfo, conf); }
@Test public void testInputJobInfoInConf() throws Exception { Configuration conf = new Configuration(false); InputJobInfo inputJobInfo = HCatUtil.getLastInputJobInfosFromConf(conf); Assert.assertNull(inputJobInfo); List<InputJobInfo> inputJobInfos = HCatUtil.getInputJobInfosFromConf(conf); Assert.assertNull(inputJobInfos); InputJobInfo inputJobInfo0 = InputJobInfo.create("db", "table", "", new Properties()); InputJobInfo inputJobInfo1 = InputJobInfo.create("db", "table2", "", new Properties()); HCatUtil.putInputJobInfoToConf(inputJobInfo0, conf); HCatUtil.putInputJobInfoToConf(inputJobInfo1, conf); inputJobInfo = HCatUtil.getLastInputJobInfosFromConf(conf); inputJobInfos = HCatUtil.getInputJobInfosFromConf(conf); Assert.assertEquals(inputJobInfo1.getDatabaseName(), inputJobInfo.getDatabaseName()); Assert.assertEquals(inputJobInfo1.getTableName(), inputJobInfo.getTableName()); Assert.assertEquals(inputJobInfo0.getDatabaseName(), inputJobInfos.get(0).getDatabaseName()); Assert.assertEquals(inputJobInfo0.getTableName(), inputJobInfos.get(0).getTableName()); } }
inputJobInfo.getTableName()); inputJobInfo.getTableName(), inputJobInfo.getFilter(), (short) -1);
/** * Set properties for the input format. * @param properties properties for the input specification * @return this * @throws IOException on all errors */ public HCatInputFormat setProperties(Properties properties) throws IOException { Preconditions.checkNotNull(properties, "required argument 'properties' is null"); inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), properties); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } return this; }
/** * @deprecated as of 0.13, slated for removal with 0.15 * Use {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} instead, * to specify a partition filter to directly initialize the input with. */ @Deprecated public HCatInputFormat setFilter(String filter) throws IOException { // null filters are supported to simplify client code if (filter != null) { inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), filter, inputJobInfo.getProperties()); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } } return this; }
/** * @deprecated as of 0.13, slated for removal with 0.15 * Use {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} instead, * to specify a partition filter to directly initialize the input with. */ @Deprecated public HCatInputFormat setFilter(String filter) throws IOException { // null filters are supported to simplify client code if (filter != null) { inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), filter, inputJobInfo.getProperties()); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } } return this; }
/** * Set properties for the input format. * @param properties properties for the input specification * @return this * @throws IOException on all errors */ public HCatInputFormat setProperties(Properties properties) throws IOException { Preconditions.checkNotNull(properties, "required argument 'properties' is null"); inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), properties); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } return this; }
/** * Set properties for the input format. * @param properties properties for the input specification * @return this * @throws IOException on all errors */ public HCatInputFormat setProperties(Properties properties) throws IOException { Preconditions.checkNotNull(properties, "required argument 'properties' is null"); inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), properties); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } return this; }
/** * @deprecated as of 0.13, slated for removal with 0.15 * Use {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} instead, * to specify a partition filter to directly initialize the input with. */ @Deprecated public HCatInputFormat setFilter(String filter) throws IOException { // null filters are supported to simplify client code if (filter != null) { inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), filter, inputJobInfo.getProperties()); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } } return this; }
/** * @deprecated as of 0.13, slated for removal with 0.15 * Use {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} instead, * to specify a partition filter to directly initialize the input with. */ @Deprecated public HCatInputFormat setFilter(String filter) throws IOException { // null filters are supported to simplify client code if (filter != null) { inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), filter, inputJobInfo.getProperties()); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } } return this; }
/** * Set properties for the input format. * @param properties properties for the input specification * @return this * @throws IOException on all errors */ public HCatInputFormat setProperties(Properties properties) throws IOException { Preconditions.checkNotNull(properties, "required argument 'properties' is null"); inputJobInfo = InputJobInfo.create( inputJobInfo.getDatabaseName(), inputJobInfo.getTableName(), inputJobInfo.getFilter(), properties); try { InitializeInput.setInput(conf, inputJobInfo); } catch (Exception e) { throw new IOException(e); } return this; }
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { tableDesc.getJobProperties().put( HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfo)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { tableDesc.getJobProperties().put( HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfo)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { tableDesc.getJobProperties().put( HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfo)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
/** * Set the input to use for the Job. This queries the metadata server with the specified * partition predicates, gets the matching partitions, and puts the information in the job * configuration object. * * To ensure a known InputJobInfo state, only the database name, table name, filter, and * properties are preserved. All other modification from the given InputJobInfo are discarded. * * After calling setInput, InputJobInfo can be retrieved from the job configuration as follows: * {code} * InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize( * job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); * {code} * * @param conf the job Configuration object * @param theirInputJobInfo information on the Input to read * @throws Exception */ public static void setInput(Configuration conf, InputJobInfo theirInputJobInfo) throws Exception { InputJobInfo inputJobInfo = InputJobInfo.create( theirInputJobInfo.getDatabaseName(), theirInputJobInfo.getTableName(), theirInputJobInfo.getFilter(), theirInputJobInfo.getProperties()); conf.set( HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(getInputJobInfo(conf, inputJobInfo, null))); }