void runMRCreateFail( String dbName, String tableName, Map<String, String> partitionValues, List<HCatFieldSchema> columns) throws Exception { Job job = new Job(mrConf, "hcat mapreduce write fail test"); job.setJarByClass(this.getClass()); job.setMapperClass(TestHCatPartitionPublish.MapFail.class); // input/output settings job.setInputFormatClass(TextInputFormat.class); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); // The write count does not matter, as the map will fail in its first // call. createInputFile(path, 5); TextInputFormat.setInputPaths(job, path); job.setOutputFormatClass(HCatOutputFormat.class); OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); HCatOutputFormat.setOutput(job, outputJobInfo); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); job.setNumReduceTasks(0); HCatOutputFormat.setSchema(job, new HCatSchema(columns)); boolean success = job.waitForCompletion(true); Assert.assertTrue(success == false); }
HCatUtil.deserialize(tableDesc.getJobProperties().get( HCatConstants.HCAT_KEY_OUTPUT_INFO)); String parentPath = jobInfo.getTableInfo().getTableLocation(); String dynHash = tableDesc.getJobProperties().get( HCatConstants.HCAT_DYNAMIC_PTN_JOBID); && jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) { parentPath = new Path(parentPath, jobInfo.getCustomDynamicRoot()).toString(); && jobInfo.getCustomDynamicPath() != null && jobInfo.getCustomDynamicPath().length() > 0) { } else if ((dynHash == null) && Boolean.parseBoolean((String)tableDesc.getProperties().get("EXTERNAL")) && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { outputLocation = jobInfo.getLocation(); } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) { jobInfo.getTableInfo(). getPartitionColumns().getFieldNames()) { String value = jobInfo.getPartitionValues().get(name); cols.add(name); values.add(value); jobInfo.setLocation(new Path(parentPath, outputLocation).toString()); } else { jobInfo.setLocation(new Path(parentPath).toString());
Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), outputJobInfo.getTableName()); if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) { outputJobInfo.setPartitionValues(new HashMap<String, String>()); if (outputJobInfo.getPartitionValues() != null) { for (Map.Entry<String, String> entry : outputJobInfo.getPartitionValues().entrySet()) { valueMap.put(entry.getKey().toLowerCase(), entry.getValue()); if ((outputJobInfo.getPartitionValues() == null) || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) { outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys); String dynHash; if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) { outputJobInfo.setPartitionValues(valueMap); outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); outputJobInfo.setOutputSchema(tableSchema); harRequested = getHarRequested(hiveConf); outputJobInfo.setHarRequested(harRequested); maxDynamicPartitions = getMaxDynamicPartitions(hiveConf); outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions);
public static void getPartKeyValuesForCustomLocation(Map<String, String> partSpec, OutputJobInfo jobInfo, String partitionPath) { // create matchers for custom path string as well as actual dynamic partition path created Matcher customPathMatcher = customPathPattern.matcher(jobInfo.getCustomDynamicPath()); Matcher dynamicPathMatcher = customPathPattern.matcher(partitionPath); while (customPathMatcher.find() && dynamicPathMatcher.find()) { // get column name from custom path matcher and column value from dynamic path matcher partSpec.put(customPathMatcher.group(2), dynamicPathMatcher.group(2)); } // add any partition key values provided as part of job info partSpec.putAll(jobInfo.getPartitionValues()); }
private String getCustomPartitionRootLocation(OutputJobInfo jobInfo, Configuration conf) { if (ptnRootLocation == null) { // we only need to calculate it once, it'll be the same for other partitions in this job. String parentPath = jobInfo.getTableInfo().getTableLocation(); if (jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) { parentPath = new Path(parentPath, jobInfo.getCustomDynamicRoot()).toString(); } Path ptnRoot = new Path(parentPath, DYNTEMP_DIR_NAME + conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)); ptnRootLocation = ptnRoot.toString(); } return ptnRootLocation; }
static void initializeOutputSerDe(AbstractSerDe serDe, Configuration conf, OutputJobInfo jobInfo) throws SerDeException { SerDeUtils.initializeSerDe(serDe, conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema()), null); }
String getTokenSignature(OutputJobInfo outputJobInfo) { StringBuilder result = new StringBuilder(""); String dbName = outputJobInfo.getDatabaseName(); if (dbName != null) { result.append(dbName); } String tableName = outputJobInfo.getTableName(); if (tableName != null) { result.append("." + tableName); } Map<String, String> partValues = outputJobInfo.getPartitionValues(); if (partValues != null) { for (Entry<String, String> entry : partValues.entrySet()) { result.append("/"); result.append(entry.getKey()); result.append("="); result.append(entry.getValue()); } } return result.toString(); }
if (!outputInfo.isDynamicPartitioningUsed()) { List<String> partitionValues = getPartitionValueList( table, outputInfo.getPartitionValues()); List<String> currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), outputInfo.getTableName(), partitionValues, (short) 1); table, outputInfo.getPartitionValues());
if (jobInfo.isDynamicPartitioningUsed()) { for (String partKey : jobInfo.getDynamicPartitioningKeys()) { Integer idx; if ((idx = schema.getPosition(partKey)) != null) { new Table(jobInfo.getTableInfo().getTable()), schemaWithoutParts); jobInfo.setPosOfPartCols(posOfPartCols); jobInfo.setPosOfDynPartCols(posOfDynPartCols); jobInfo.setOutputSchema(schemaWithoutParts);
Properties props = outputJobInfo.getTableInfo().getStorerInfo().getProperties(); props.put(serdeConstants.SERIALIZATION_LIB,storageHandler.getSerDeClass().getName()); TableDesc tableDesc = new TableDesc(storageHandler.getInputFormatClass(), mytableProperties.setProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, outputJobInfo.getDatabaseName()+ "." + outputJobInfo.getTableName());
try { OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(jobContext.getConfiguration(),jobInfo.getTableInfo().getStorerInfo()); Map<String, String> partitionValues = jobInfo.getPartitionValues(); String location = jobInfo.getLocation(); List<String> dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); if (dynamicPartVals.size() != dynamicPartKeys.size()) { throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, jobInfo.setPartitionValues(partitionValues);
/** * @param context current JobContext * @param baseCommitter OutputCommitter to contain * @throws IOException */ public FileOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { super(context, baseCommitter); jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); this.partitionsDiscovered = !dynamicPartitioningUsed; cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); Table table = new Table(jobInfo.getTableInfo().getTable()); if (dynamicPartitioningUsed && Boolean.parseBoolean((String)table.getProperty("EXTERNAL")) && jobInfo.getCustomDynamicPath() != null && jobInfo.getCustomDynamicPath().length() > 0) { customDynamicLocationUsed = true; } else { customDynamicLocationUsed = false; } this.maxAppendAttempts = context.getConfiguration().getInt(HCatConstants.HCAT_APPEND_LIMIT, APPEND_COUNTER_WARN_THRESHOLD); }
public static String resolveCustomPath(OutputJobInfo jobInfo, Map<String, String> dynPartKVs, boolean createRegexPath) { String customPath = jobInfo.getCustomDynamicPath(); jobInfo.getPartitionValues(); if (partColumns.size() != jobInfo.getDynamicPartitioningKeys().size()) { throw new IllegalArgumentException("Unable to configure custom dynamic location, " + " mismatch between number of dynamic partition columns obtained[" + partColumns.size() + "] and number of dynamic partition columns required[" + jobInfo.getDynamicPartitioningKeys().size() + "]");
OutputJobInfo outputJobInfo; if (userStr.length == 2) { outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); } else if (userStr.length == 1) { outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); } else { throw new FrontendException("location " + location outputJobInfo.setLocation(externalLocation);
public void testSetOutput() throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "test outputformat"); Map<String, String> partitionValues = new HashMap<String, String>(); partitionValues.put("colname", "p1"); //null server url means local mode OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); HCatOutputFormat.setOutput(job, info); OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job.getConfiguration()); assertNotNull(jobInfo.getTableInfo()); assertEquals(1, jobInfo.getPartitionValues().size()); assertEquals("p1", jobInfo.getPartitionValues().get("colname")); assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); assertEquals("data_column", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); publishTest(job); }
OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil .deserialize(jobInfoString); StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler( context.getConfiguration(), storeInfo); if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()){
Table table = new Table(jobInfo.getTableInfo().getTable()); Path tblPath = new Path(table.getTTable().getSd().getLocation()); FileSystem fs = tblPath.getFileSystem(conf); IMetaStoreClient client = null; HCatTableInfo tableInfo = jobInfo.getTableInfo(); List<Partition> partitionsAdded = new ArrayList<Partition>(); try { Path src = new Path(jobInfo.getLocation()); moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable()); if (!src.equals(tblPath)) { if (!dynamicPartitioningUsed) { partitionsToAdd.add(constructPartition(context, jobInfo, tblPath.toString(), null, jobInfo.getPartitionValues(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, hiveConf, status)); } else { partitionsToAdd.add(constructPartition(context, jobInfo, getPartitionRootLocation(entry.getKey(), entry.getValue().size()), entry.getKey(), entry.getValue(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, hiveConf, status)); updateTableSchema(client, table, jobInfo.getOutputSchema()); LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); client.add_partitions(partitionsToAdd); updateTableSchema(client, table, jobInfo.getOutputSchema()); LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos);
/** * @param baseWriter RecordWriter to contain * @param context current TaskAttemptContext * @throws IOException * @throws InterruptedException */ public FileRecordWriterContainer( RecordWriter<? super WritableComparable<?>, ? super Writable> baseWriter, TaskAttemptContext context) throws IOException, InterruptedException { super(context, baseWriter); this.context = context; jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo() .getStorerInfo()); serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); try { InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); } catch (SerDeException e) { throw new IOException("Failed to inialize SerDe", e); } // If partition columns occur in data, we want to remove them. partColsToDel = jobInfo.getPosOfPartCols(); if (partColsToDel == null) { throw new HCatException("It seems that setSchema() is not called on " + "HCatOutputFormat. Please make sure that method is called."); } }
jobInfo.getOutputSchema().getFields().size())); } else if (ofclass == OrcOutputFormat.class) { Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters(); for (OrcConf property : OrcConf.values()){ String propName = property.getAttribute(); Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters(); for (AvroSerdeUtils.AvroTableProperties property : AvroSerdeUtils.AvroTableProperties.values()) { String propName = property.getPropName(); properties.put("name",jobInfo.getTableName()); List<String> colNames = jobInfo.getOutputSchema().getFieldNames(); List<TypeInfo> colTypes = new ArrayList<TypeInfo>(); for (HCatFieldSchema field : jobInfo.getOutputSchema().getFields()){ colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getTypeString())); Map<String, String> tableProps = jobInfo.getTableInfo().getTable().getParameters(); for (String key : tableProps.keySet()) { if (ParquetTableUtils.isParquetProperty(key)) { List<String> colNames = jobInfo.getOutputSchema().getFieldNames(); List<TypeInfo> colTypes = new ArrayList<TypeInfo>(); for (HCatFieldSchema field : jobInfo.getOutputSchema().getFields()){ colTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getTypeString()));
OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); if (customDynamicPathPattern != null) { job.getConfiguration().set(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern); externalTableLocation = outputJobInfo.getTableInfo().getTableLocation();