StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn));
@Override public void initialize(org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); createDeserializer(hcatSplit, storageHandler, taskContext); // Pull the output schema out of the TaskAttemptContext outputSchema = (HCatSchema) HCatUtil.deserialize( taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); if (outputSchema == null) { outputSchema = hcatSplit.getTableSchema(); } // Pull the table schema out of the Split info // TODO This should be passed in the TaskAttemptContext instead dataSchema = hcatSplit.getDataSchema(); errorTracker = new InputErrorTracker(taskContext.getConfiguration()); }
ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); } catch (SerDeException e) { baseOF.getRecordWriter(parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(), childPath.toString(), InternalUtil.createReporter(currTaskContext)); dynamicContexts.put(dynKey, currTaskContext); dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration()));
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, Map<String, String> parameters, Configuration conf, InputJobInfo inputJobInfo) throws IOException { StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); Properties hcatProperties = new Properties(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); // copy the properties from storageHandler to jobProperties Map<String, String> jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); for (String key : parameters.keySet()) { hcatProperties.put(key, parameters.get(key)); } // FIXME // Bloating partinfo with inputJobInfo is not good return new PartInfo(schema, storageHandler, sd.getLocation(), hcatProperties, jobProperties, inputJobInfo.getTableInfo()); }
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { getBaseRecordWriter().close(InternalUtil.createReporter(context)); }
static void initializeDeserializer(Deserializer deserializer, Configuration conf, HCatTableInfo info, HCatSchema schema) throws SerDeException { Properties props = getSerdeProperties(info, schema); LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props); SerDeUtils.initializeSerDe(deserializer, conf, props, null); }
static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { if (outputSchema == null) { throw new IOException("Invalid output schema specified"); } List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>(); List<String> fieldNames = new ArrayList<String>(); for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); fieldNames.add(hcatFieldSchema.getName()); fieldInspectors.add(getObjectInspector(type)); } StructObjectInspector structInspector = ObjectInspectorFactory. getStandardStructObjectInspector(fieldNames, fieldInspectors); return structInspector; }
private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), taskContext.getConfiguration()); try { InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), hcatSplit.getPartitionInfo().getTableInfo(), hcatSplit.getPartitionInfo().getPartitionSchema()); } catch (SerDeException e) { throw new IOException("Failed initializing deserializer " + storageHandler.getSerDeClass().getName(), e); } }
PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), ptn.getParameters(), conf, inputJobInfo); partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn)); partInfoList.add(partInfo);
/** * @param baseWriter RecordWriter to contain * @param context current TaskAttemptContext * @throws IOException * @throws InterruptedException */ public FileRecordWriterContainer( RecordWriter<? super WritableComparable<?>, ? super Writable> baseWriter, TaskAttemptContext context) throws IOException, InterruptedException { super(context, baseWriter); this.context = context; jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo() .getStorerInfo()); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to inialize SerDe", e); } // If partition columns occur in data, we want to remove them. partColsToDel = jobInfo.getPosOfPartCols(); if (partColsToDel == null) { throw new HCatException("It seems that setSchema() is not called on " + "HCatOutputFormat. Please make sure that method is called."); } }
/** * create an HCatTableInfo instance from the supplied Hive Table instance * @param table to create an instance from * @return HCatTableInfo * @throws IOException */ static HCatTableInfo valueOf(Table table) throws IOException { // Explicitly use {@link org.apache.hadoop.hive.ql.metadata.Table} when getting the schema, // but store @{link org.apache.hadoop.hive.metastore.api.Table} as this class is serialized // into the job conf. org.apache.hadoop.hive.ql.metadata.Table mTable = new org.apache.hadoop.hive.ql.metadata.Table(table); HCatSchema schema = HCatUtil.extractSchema(mTable); StorerInfo storerInfo = InternalUtil.extractStorerInfo(table.getSd(), table.getParameters()); HCatSchema partitionColumns = HCatUtil.getPartitionColumns(mTable); return new HCatTableInfo(table.getDbName(), table.getTableName(), schema, partitionColumns, storerInfo, table); }
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { Reporter reporter = InternalUtil.createReporter(context); getBaseRecordWriter().close(reporter); }
static void initializeOutputSerDe(AbstractSerDe serDe, Configuration conf, OutputJobInfo jobInfo) throws SerDeException { SerDeUtils.initializeSerDe(serDe, conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema()), null); }
private static ObjectInspector getObjectInspector(TypeInfo type) throws IOException { switch (type.getCategory()) { case PRIMITIVE: PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; return PrimitiveObjectInspectorFactory. getPrimitiveJavaObjectInspector(primitiveType); case MAP: MapTypeInfo mapType = (MapTypeInfo) type; MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); return mapInspector; case LIST: ListTypeInfo listType = (ListTypeInfo) type; ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( getObjectInspector(listType.getListElementTypeInfo())); return listInspector; case STRUCT: StructTypeInfo structType = (StructTypeInfo) type; List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos(); List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>(); for (TypeInfo fieldType : fieldTypes) { fieldInspectors.add(getObjectInspector(fieldType)); } StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( structType.getAllStructFieldNames(), fieldInspectors); return structInspector; default: throw new IOException("Unknown field schema type"); } }
private void createDeserializer(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), taskContext.getConfiguration()); try { InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), hcatSplit.getPartitionInfo().getTableInfo(), hcatSplit.getPartitionInfo().getPartitionSchema()); } catch (SerDeException e) { throw new IOException("Failed initializing deserializer " + storageHandler.getSerDeClass().getName(), e); } }
PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), ptn.getParameters(), conf, inputJobInfo); partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn)); partInfoList.add(partInfo);
ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); } catch (SerDeException e) { baseOF.getRecordWriter(parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(), childPath.toString(), InternalUtil.createReporter(currTaskContext)); dynamicContexts.put(dynKey, currTaskContext); dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration()));
/** * @param context current JobContext * @param baseRecordWriter RecordWriter to contain * @throws IOException * @throws InterruptedException */ public DefaultRecordWriterContainer(TaskAttemptContext context, org.apache.hadoop.mapred.RecordWriter<? super WritableComparable<?>, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { super(context, baseRecordWriter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); HCatOutputFormat.configureOutputStorageHandler(context); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to initialize SerDe", e); } }
StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn));