private Map<String, String> getStorerParameterMap(StorerInfo storer) { Map<String, String> params = new HashMap<String, String>(); //Copy table level hcat.* keys to the partition for (Entry<Object, Object> entry : storer.getProperties().entrySet()) { if (!entry.getKey().toString().equals(StatsSetupConst.COLUMN_STATS_ACCURATE)) { params.put(entry.getKey().toString(), entry.getValue().toString()); } } return params; }
/** * Create an instance of a storage handler defined in storerInfo. If one cannot be found * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. * @param conf job's configuration will be used to configure the Configurable StorageHandler * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe * @return storageHandler instance * @throws IOException */ public static HiveStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { return getStorageHandler(conf, storerInfo.getStorageHandlerClass(), storerInfo.getSerdeClass(), storerInfo.getIfClass(), storerInfo.getOfClass()); }
static StorerInfo extractStorerInfo(StorageDescriptor sd, Map<String, String> properties) throws IOException { Properties hcatProperties = new Properties(); for (String key : properties.keySet()) { hcatProperties.put(key, properties.get(key)); } // also populate with StorageDescriptor->SerDe.Parameters for (Map.Entry<String, String> param : sd.getSerdeInfo().getParameters().entrySet()) { hcatProperties.put(param.getKey(), param.getValue()); } return new StorerInfo( sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), hcatProperties); }
/** * Method to do any storage-handler specific special casing while instantiating a * HCatLoader * * @param conf : configuration to write to * @param tableInfo : the table definition being used */ public static void addSpecialCasesParametersForHCatLoader( Configuration conf, HCatTableInfo tableInfo) { if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ return; } String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ // NOTE: The reason we use a string name of the hive hbase handler here is // because we do not want to introduce a compile-dependency on the hive-hbase-handler // module from within hive-hcatalog. // This parameter was added due to the requirement in HIVE-7072 conf.set("pig.noSplitCombination", "true"); } }
public static void addSpecialCasesParametersForHCatLoader( Configuration conf, HCatTableInfo tableInfo) { if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ return; } String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); if ((shClass != null) && shClass.equals( "org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ // NOTE: The reason we use a string name of the hive hbase handler here is // because we do not want to introduce a compile-dependency on the // hive-hbase-handler module from within hive-hcatalog. // This parameter was added due to the requirement in HIVE-7072 conf.set("pig.noSplitCombination", "true"); } }
/** * Undoes the effects of compression( dedupWithTableInfo() ) during serialization, * and restores PartInfo fields to return original data. * Can be called idempotently, repeatably. */ private void restoreLocalInfoFromTableInfo() { assert tableInfo != null : "TableInfo can't be null at this point."; if (partitionSchema == null) { partitionSchema = tableInfo.getDataColumns(); } if (storageHandlerClassName == null) { storageHandlerClassName = tableInfo.getStorerInfo().getStorageHandlerClass(); } if (inputFormatClassName == null) { inputFormatClassName = tableInfo.getStorerInfo().getIfClass(); } if (outputFormatClassName == null) { outputFormatClassName = tableInfo.getStorerInfo().getOfClass(); } if (serdeClassName == null) { serdeClassName = tableInfo.getStorerInfo().getSerdeClass(); } }
private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) throws SerDeException { Properties props = new Properties(); List<FieldSchema> fields = HCatUtil.getFieldSchemaList(s.getFields()); props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); props.setProperty(serdeConstants.COLUMN_NAME_DELIMITER, MetaStoreUtils.getColumnNameDelimiter(fields)); props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); props.setProperty("columns.comments", MetaStoreUtils.getColumnCommentsFromFieldSchema(fields)); // setting these props to match LazySimpleSerde props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); //add props from params set in table schema props.putAll(info.getStorerInfo().getProperties()); return props; }
/** * Method to do any storage-handler specific special casing while instantiating a * HCatLoader * * @param conf : configuration to write to * @param tableInfo : the table definition being used */ public static void addSpecialCasesParametersForHCatLoader( Configuration conf, HCatTableInfo tableInfo) { if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ return; } String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ // NOTE: The reason we use a string name of the hive hbase handler here is // because we do not want to introduce a compile-dependency on the hive-hbase-handler // module from within hive-hcatalog. // This parameter was added due to the requirement in HIVE-7072 conf.set("pig.noSplitCombination", "true"); } }
static StorerInfo extractStorerInfo(StorageDescriptor sd, Map<String, String> properties) throws IOException { Properties hcatProperties = new Properties(); for (String key : properties.keySet()) { hcatProperties.put(key, properties.get(key)); } // also populate with StorageDescriptor->SerDe.Parameters for (Map.Entry<String, String> param : sd.getSerdeInfo().getParameters().entrySet()) { hcatProperties.put(param.getKey(), param.getValue()); } return new StorerInfo( sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), hcatProperties); }
if (storageHandlerClassName.equals(tableInfo.getStorerInfo().getStorageHandlerClass())) { storageHandlerClassName = null; } else { if (LOG.isDebugEnabled()) { LOG.debug("Partition's storageHandler (" + storageHandlerClassName + ") " + "differs from table's storageHandler (" + tableInfo.getStorerInfo().getStorageHandlerClass() + ")."); if (inputFormatClassName.equals(tableInfo.getStorerInfo().getIfClass())) { inputFormatClassName = null; } else { if (LOG.isDebugEnabled()) { LOG.debug("Partition's InputFormat (" + inputFormatClassName + ") " + "differs from table's InputFormat (" + tableInfo.getStorerInfo().getIfClass() + ")."); if (outputFormatClassName.equals(tableInfo.getStorerInfo().getOfClass())) { outputFormatClassName = null; } else { if (LOG.isDebugEnabled()) { LOG.debug("Partition's OutputFormat (" + outputFormatClassName + ") " + "differs from table's OutputFormat (" + tableInfo.getStorerInfo().getOfClass() + ")."); if (serdeClassName.equals(tableInfo.getStorerInfo().getSerdeClass())) { serdeClassName = null; } else { if (LOG.isDebugEnabled()) { LOG.debug("Partition's SerDe (" + serdeClassName + ") " + "differs from table's SerDe (" + tableInfo.getStorerInfo().getSerdeClass() + ").");
Properties props = outputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(),
/** * Method to do any storage-handler specific special casing while instantiating a * HCatLoader * * @param conf : configuration to write to * @param tableInfo : the table definition being used */ public static void addSpecialCasesParametersForHCatLoader( Configuration conf, HCatTableInfo tableInfo) { if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ return; } String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ // NOTE: The reason we use a string name of the hive hbase handler here is // because we do not want to introduce a compile-dependency on the hive-hbase-handler // module from within hive-hcatalog. // This parameter was added due to the requirement in HIVE-7072 conf.set("pig.noSplitCombination", "true"); } }
static StorerInfo extractStorerInfo(StorageDescriptor sd, Map<String, String> properties) throws IOException { Properties hcatProperties = new Properties(); for (String key : properties.keySet()) { hcatProperties.put(key, properties.get(key)); } // also populate with StorageDescriptor->SerDe.Parameters for (Map.Entry<String, String> param : sd.getSerdeInfo().getParameters().entrySet()) { hcatProperties.put(param.getKey(), param.getValue()); } return new StorerInfo( sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), hcatProperties); }
/** * Create an instance of a storage handler defined in storerInfo. If one cannot be found * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. * @param conf job's configuration will be used to configure the Configurable StorageHandler * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe * @return storageHandler instance * @throws IOException */ public static HiveStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { return getStorageHandler(conf, storerInfo.getStorageHandlerClass(), storerInfo.getSerdeClass(), storerInfo.getIfClass(), storerInfo.getOfClass()); }
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { Map<String, String> properties = tableDesc.getJobProperties(); LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( properties.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); properties.put(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
/** * Method to do any storage-handler specific special casing while instantiating a * HCatLoader * * @param conf : configuration to write to * @param tableInfo : the table definition being used */ public static void addSpecialCasesParametersForHCatLoader( Configuration conf, HCatTableInfo tableInfo) { if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ return; } String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ // NOTE: The reason we use a string name of the hive hbase handler here is // because we do not want to introduce a compile-dependency on the hive-hbase-handler // module from within hive-hcatalog. // This parameter was added due to the requirement in HIVE-7072 conf.set("pig.noSplitCombination", "true"); } }
static StorerInfo extractStorerInfo(StorageDescriptor sd, Map<String, String> properties) throws IOException { Properties hcatProperties = new Properties(); for (String key : properties.keySet()) { hcatProperties.put(key, properties.get(key)); } // also populate with StorageDescriptor->SerDe.Parameters for (Map.Entry<String, String> param : sd.getSerdeInfo().getParameters().entrySet()) { hcatProperties.put(param.getKey(), param.getValue()); } return new StorerInfo( sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), hcatProperties); }
/** * Create an instance of a storage handler defined in storerInfo. If one cannot be found * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. * @param conf job's configuration will be used to configure the Configurable StorageHandler * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe * @return storageHandler instance * @throws IOException */ public static HiveStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { return getStorageHandler(conf, storerInfo.getStorageHandlerClass(), storerInfo.getSerdeClass(), storerInfo.getIfClass(), storerInfo.getOfClass()); }
private void publishTest(Job job) throws Exception { HCatOutputFormat hcof = new HCatOutputFormat(); TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()); OutputCommitter committer = hcof.getOutputCommitter(tac); committer.setupJob(job); committer.setupTask(tac); committer.commitTask(tac); committer.commitJob(job); Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1")); assertNotNull(part); StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters()); assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue"); assertTrue(part.getSd().getLocation().contains("p1")); }
/** * Method to do any storage-handler specific special casing while instantiating a * HCatLoader * * @param conf : configuration to write to * @param tableInfo : the table definition being used */ public static void addSpecialCasesParametersForHCatLoader( Configuration conf, HCatTableInfo tableInfo) { if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ return; } String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ // NOTE: The reason we use a string name of the hive hbase handler here is // because we do not want to introduce a compile-dependency on the hive-hbase-handler // module from within hive-hcatalog. // This parameter was added due to the requirement in HIVE-7072 conf.set("pig.noSplitCombination", "true"); } }