/** * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call * on the specified job context. * Note: This is the complete table-schema, including the record-schema *and* the partitioning schema. * @param conf the Configuration object * @return the table schema, including the record-schema and partitioning schema. * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context */ public static HCatSchema getTableSchemaWithPartitionColumns(Configuration conf) throws IOException { return getJobInfo(conf).getTableInfo().getAllColumns(); }
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
/** * Check for validity of the output-specification for the job. * @param context information about the job * @throws IOException when output should not be attempted */ @Override public void checkOutputSpecs(JobContext context ) throws IOException, InterruptedException { getOutputFormat(context).checkOutputSpecs(context); }
/** * Configure the output storage handler * @param jobContext the job context * @throws IOException */ @SuppressWarnings("unchecked") static void configureOutputStorageHandler( JobContext jobContext) throws IOException { configureOutputStorageHandler(jobContext, (List<String>) null); }
/** * Configure the output storage handler, with allowing specification * of partvals from which it picks the dynamic partvals * @param context the job context * @param jobInfo the output job info * @param fullPartSpec * @throws IOException */ protected static void configureOutputStorageHandler( JobContext context, OutputJobInfo jobInfo, Map<String, String> fullPartSpec) throws IOException { List<String> dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); if ((dynamicPartKeys == null) || (dynamicPartKeys.isEmpty())) { configureOutputStorageHandler(context, (List<String>) null); } else { List<String> dynKeyVals = new ArrayList<String>(); for (String dynamicPartKey : dynamicPartKeys) { dynKeyVals.add(fullPartSpec.get(dynamicPartKey)); } configureOutputStorageHandler(context, dynKeyVals); } }
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { //this needs to be manually set, under normal circumstances MR Task does this setWorkOutputPath(context); return new FileOutputCommitterContainer(context, HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed() ? null : new JobConf(context.getConfiguration()).getOutputCommitter()); }
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
/** * Configure the output storage handler * @param jobContext the job context * @throws IOException */ @SuppressWarnings("unchecked") static void configureOutputStorageHandler( JobContext jobContext) throws IOException { configureOutputStorageHandler(jobContext, (List<String>) null); }
/** * Check for validity of the output-specification for the job. * @param context information about the job * @throws IOException when output should not be attempted */ @Override public void checkOutputSpecs(JobContext context ) throws IOException, InterruptedException { getOutputFormat(context).checkOutputSpecs(context); }
/** * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call * on the specified job context. * Note: This is the record-schema for the table. It does not include the table's partition columns. * @param conf the Configuration object * @return the table schema, excluding partition columns * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context */ public static HCatSchema getTableSchema(Configuration conf) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); return jobInfo.getTableInfo().getDataColumns(); }
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
/** * Configure the output storage handler * @param jobContext the job context * @throws IOException */ @SuppressWarnings("unchecked") static void configureOutputStorageHandler( JobContext jobContext) throws IOException { configureOutputStorageHandler(jobContext, (List<String>) null); }
/** * Check for validity of the output-specification for the job. * @param context information about the job * @throws IOException when output should not be attempted */ @Override public void checkOutputSpecs(JobContext context ) throws IOException, InterruptedException { getOutputFormat(context).checkOutputSpecs(context); }
if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()){
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
/** * Configure the output storage handler * @param jobContext the job context * @throws IOException */ @SuppressWarnings("unchecked") static void configureOutputStorageHandler( JobContext jobContext) throws IOException { configureOutputStorageHandler(jobContext, (List<String>) null); }
/** * Check for validity of the output-specification for the job. * @param context information about the job * @throws IOException when output should not be attempted */ @Override public void checkOutputSpecs(JobContext context ) throws IOException, InterruptedException { getOutputFormat(context).checkOutputSpecs(context); }
HCatMapRedUtil.createTaskAttemptContext(context); configureDynamicStorageHandler(currTaskContext, dynamicPartValues); localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext.getConfiguration());
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
/** * Configure the output storage handler * @param jobContext the job context * @throws IOException */ @SuppressWarnings("unchecked") static void configureOutputStorageHandler( JobContext jobContext) throws IOException { configureOutputStorageHandler(jobContext, (List<String>) null); }