/** * Instantiates a new hcat partition info. * @param partitionSchema the partition schema * @param storageHandler the storage handler * @param location the location * @param hcatProperties hcat-specific properties at the partition * @param jobProperties the job properties * @param tableInfo the table information */ public PartInfo(HCatSchema partitionSchema, HiveStorageHandler storageHandler, String location, Properties hcatProperties, Map<String, String> jobProperties, HCatTableInfo tableInfo) { this.partitionSchema = partitionSchema; this.location = location; this.hcatProperties = hcatProperties; this.jobProperties = jobProperties; this.tableInfo = tableInfo; this.storageHandlerClassName = storageHandler.getClass().getName(); this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); this.serdeClassName = storageHandler.getSerDeClass().getName(); this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); }
public static Map<String, String> getInputJobProperties(HiveStorageHandler storageHandler, InputJobInfo inputJobInfo) { Properties props = inputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), storageHandler.getOutputFormatClass(),props); if (tableDesc.getJobProperties() == null) { tableDesc.setJobProperties(new HashMap<String, String>()); } Properties mytableProperties = tableDesc.getProperties(); mytableProperties.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME,inputJobInfo.getDatabaseName()+ "." + inputJobInfo.getTableName()); Map<String, String> jobProperties = new HashMap<String, String>(); try { Map<String, String> properties = tableDesc.getJobProperties(); LinkedList<InputJobInfo> inputJobInfos = (LinkedList<InputJobInfo>) HCatUtil.deserialize( properties.get(HCatConstants.HCAT_KEY_JOB_INFO)); if (inputJobInfos == null) { inputJobInfos = new LinkedList<>(); } inputJobInfos.add(inputJobInfo); properties.put(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inputJobInfos)); storageHandler.configureInputJobProperties(tableDesc, jobProperties); } catch (IOException e) { throw new IllegalStateException( "Failed to configure StorageHandler", e); } return jobProperties; }
@Override public HiveMetaHook getHook(Table tbl) throws MetaException { if (tbl == null) { return null; } try { HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(hiveConf, tbl.getParameters().get(META_TABLE_STORAGE)); return storageHandler == null ? null : storageHandler.getMetaHook(); } catch (HiveException e) { LOG.error(e.toString()); throw new MetaException("Failed to get storage handler: " + e); } } };
private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), taskContext.getConfiguration()); try { InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), hcatSplit.getPartitionInfo().getTableInfo(), hcatSplit.getPartitionInfo().getPartitionSchema()); } catch (SerDeException e) { throw new IOException("Failed initializing deserializer " + storageHandler.getSerDeClass().getName(), e); } }
if(input) { try { storageHandler.configureInputJobProperties( tableDesc, jobProperties); LOG.info("configureInputJobProperties not found "+ "using configureTableJobProperties",e); storageHandler.configureTableJobProperties(tableDesc, jobProperties); storageHandler.configureInputJobCredentials( tableDesc, jobSecrets); storageHandler.configureOutputJobProperties( tableDesc, jobProperties); LOG.info("configureOutputJobProperties not found"+ "using configureTableJobProperties",e); storageHandler.configureTableJobProperties(tableDesc, jobProperties);
props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), IgnoreKeyTextOutputFormat.class,props); if (tableDesc.getJobProperties() == null) HCatUtil.serialize(outputJobInfo)); storageHandler.configureOutputJobProperties(tableDesc, jobProperties);
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());
final public Class<? extends InputFormat> getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); if (className == null) { if (getStorageHandler() == null) { return null; } inputFormatClass = getStorageHandler().getInputFormatClass(); } else { inputFormatClass = (Class<? extends InputFormat>) Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); } } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } return inputFormatClass; }
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
if(input) { try { storageHandler.configureInputJobProperties( tableDesc, jobProperties); LOG.info("configureInputJobProperties not found "+ "using configureTableJobProperties",e); storageHandler.configureTableJobProperties(tableDesc, jobProperties); storageHandler.configureOutputJobProperties( tableDesc, jobProperties); LOG.info("configureOutputJobProperties not found"+ "using configureTableJobProperties",e); storageHandler.configureTableJobProperties(tableDesc, jobProperties);
/** * Utility method which gets table or partition {@link InputFormat} class. First it * tries to get the class name from given StorageDescriptor object. If it doesn't contain it tries to get it from * StorageHandler class set in table properties. If not found throws an exception. * @param job {@link JobConf} instance needed incase the table is StorageHandler based table. * @param sd {@link StorageDescriptor} instance of currently reading partition or table (for non-partitioned tables). * @param table Table object */ public static Class<? extends InputFormat<?, ?>> getInputFormatClass(final JobConf job, final StorageDescriptor sd, final Table table) throws Exception { final String inputFormatName = sd.getInputFormat(); if (Strings.isNullOrEmpty(inputFormatName)) { final String storageHandlerClass = table.getParameters().get(META_TABLE_STORAGE); if (Strings.isNullOrEmpty(storageHandlerClass)) { throw new ExecutionSetupException("Unable to get Hive table InputFormat class. There is neither " + "InputFormat class explicitly specified nor StorageHandler class"); } final HiveStorageHandler storageHandler = HiveUtils.getStorageHandler(job, storageHandlerClass); TableDesc tableDesc = new TableDesc(); tableDesc.setProperties(MetaStoreUtils.getTableMetadata(table)); storageHandler.configureInputJobProperties(tableDesc, table.getParameters()); return (Class<? extends InputFormat<?, ?>>) storageHandler.getInputFormatClass(); } else { return (Class<? extends InputFormat<?, ?>>) Class.forName(inputFormatName); } }
if (storageHandler != null) { Map<String, String> jobProperties = new LinkedHashMap<String, String>(); storageHandler.configureTableJobProperties( tableDesc, jobProperties);
props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), IgnoreKeyTextOutputFormat.class,props); if (tableDesc.getJobProperties() == null) HCatUtil.serialize(outputJobInfo)); storageHandler.configureOutputJobProperties(tableDesc, jobProperties);
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler( context.getConfiguration(), storeInfo); Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass(); AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration());
ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());
final public Class<? extends InputFormat> getInputFormatClass() { if (inputFormatClass == null) { try { String className = tTable.getSd().getInputFormat(); if (className == null) { if (getStorageHandler() == null) { return null; } inputFormatClass = getStorageHandler().getInputFormatClass(); } else { inputFormatClass = (Class<? extends InputFormat>) Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); } } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } return inputFormatClass; }
final public Class<? extends OutputFormat> getOutputFormatClass() { if (outputFormatClass == null) { try { String className = tTable.getSd().getOutputFormat(); Class<?> c; if (className == null) { if (getStorageHandler() == null) { return null; } c = getStorageHandler().getOutputFormatClass(); } else { c = Class.forName(className, true, Utilities.getSessionSpecifiedClassLoader()); } // Replace FileOutputFormat for backward compatibility outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(c); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } return outputFormatClass; }
private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), taskContext.getConfiguration()); try { InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), hcatSplit.getPartitionInfo().getTableInfo(), hcatSplit.getPartitionInfo().getPartitionSchema()); } catch (SerDeException e) { throw new IOException("Failed initializing deserializer " + storageHandler.getSerDeClass().getName(), e); } }
if(input) { try { storageHandler.configureInputJobProperties( tableDesc, jobProperties); LOG.info("configureInputJobProperties not found "+ "using configureTableJobProperties",e); storageHandler.configureTableJobProperties(tableDesc, jobProperties); storageHandler.configureOutputJobProperties( tableDesc, jobProperties); LOG.info("configureOutputJobProperties not found"+ "using configureTableJobProperties",e); storageHandler.configureTableJobProperties(tableDesc, jobProperties);