@Override public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { setWorkOutputPath(context); getBaseOutputFormat().getRecordWriter( parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()),
OutputFormatContainer getOutputFormatContainer( org.apache.hadoop.mapred.OutputFormat outputFormat) { return new FileOutputFormatContainer(outputFormat); }
List<String> partitionValues = getPartitionValueList( table, outputInfo.getPartitionValues()); List<String> partitionValues = getPartitionValueList( table, outputInfo.getPartitionValues());
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); IMetaStoreClient client = null; try { HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); client = HCatUtil.getHiveMetastoreClient(hiveConf); handleDuplicatePublish(context, jobInfo, client, new Table(jobInfo.getTableInfo().getTable())); } catch (MetaException e) { throw new IOException(e); } catch (TException e) { throw new IOException(e); } finally { HCatUtil.closeHiveClientQuietly(client); } if (!jobInfo.isDynamicPartitioningUsed()) { JobConf jobConf = new JobConf(context.getConfiguration()); getBaseOutputFormat().checkOutputSpecs(null, jobConf); //checkoutputspecs might've set some properties we need to have context reflect that HCatUtil.copyConf(jobConf, context.getConfiguration()); } }
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { //this needs to be manually set, under normal circumstances MR Task does this setWorkOutputPath(context); return new FileOutputCommitterContainer(context, HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed() ? null : new JobConf(context.getConfiguration()).getOutputCommitter()); }
@Override public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { if (!dynamicPartitioningUsed) { FileOutputFormatContainer.setWorkOutputPath(context); return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); } else { // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default return true; } }
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); IMetaStoreClient client = null; try { HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); client = HCatUtil.getHiveMetastoreClient(hiveConf); handleDuplicatePublish(context, jobInfo, client, new Table(jobInfo.getTableInfo().getTable())); } catch (MetaException e) { throw new IOException(e); } catch (TException e) { throw new IOException(e); } finally { HCatUtil.closeHiveClientQuietly(client); } if (!jobInfo.isDynamicPartitioningUsed()) { JobConf jobConf = new JobConf(context.getConfiguration()); getBaseOutputFormat().checkOutputSpecs(null, jobConf); //checkoutputspecs might've set some properties we need to have context reflect that HCatUtil.copyConf(jobConf, context.getConfiguration()); } }
@Override public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { setWorkOutputPath(context); getBaseOutputFormat().getRecordWriter( parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()),
@Override public void abortTask(TaskAttemptContext context) throws IOException { if (!dynamicPartitioningUsed) { FileOutputFormatContainer.setWorkOutputPath(context); getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); } else { try { TaskCommitContextRegistry.getInstance().abortTask(context); } finally { TaskCommitContextRegistry.getInstance().discardCleanupFor(context); } } }
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); IMetaStoreClient client = null; try { HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); client = HCatUtil.getHiveMetastoreClient(hiveConf); handleDuplicatePublish(context, jobInfo, client, new Table(jobInfo.getTableInfo().getTable())); } catch (MetaException e) { throw new IOException(e); } catch (TException e) { throw new IOException(e); } finally { HCatUtil.closeHiveClientQuietly(client); } if (!jobInfo.isDynamicPartitioningUsed()) { JobConf jobConf = new JobConf(context.getConfiguration()); getBaseOutputFormat().checkOutputSpecs(null, jobConf); //checkoutputspecs might've set some properties we need to have context reflect that HCatUtil.copyConf(jobConf, context.getConfiguration()); } }
/** * Gets the output format instance. * @param context the job context * @return the output format instance * @throws IOException */ protected OutputFormat<WritableComparable<?>, HCatRecord> getOutputFormat(JobContext context) throws IOException { OutputJobInfo jobInfo = getJobInfo(context.getConfiguration()); HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); // Always configure storage handler with jobproperties/jobconf before calling any methods on it configureOutputStorageHandler(context); if (storageHandler instanceof FosterStorageHandler) { return new FileOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } else { return new DefaultOutputFormatContainer(ReflectionUtils.newInstance( storageHandler.getOutputFormatClass(),context.getConfiguration())); } }
partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs));
@Override public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { setWorkOutputPath(context); getBaseOutputFormat().getRecordWriter( parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()),
@Override public void commitTask(TaskAttemptContext context) throws IOException { if (!dynamicPartitioningUsed) { //See HCATALOG-499 FileOutputFormatContainer.setWorkOutputPath(context); getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); } else { try { TaskCommitContextRegistry.getInstance().commitTask(context); } finally { TaskCommitContextRegistry.getInstance().discardCleanupFor(context); } } }
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); IMetaStoreClient client = null; try { HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); client = HCatUtil.getHiveMetastoreClient(hiveConf); handleDuplicatePublish(context, jobInfo, client, new Table(jobInfo.getTableInfo().getTable())); } catch (MetaException e) { throw new IOException(e); } catch (TException e) { throw new IOException(e); } finally { HCatUtil.closeHiveClientQuietly(client); } if (!jobInfo.isDynamicPartitioningUsed()) { JobConf jobConf = new JobConf(context.getConfiguration()); getBaseOutputFormat().checkOutputSpecs(null, jobConf); //checkoutputspecs might've set some properties we need to have context reflect that HCatUtil.copyConf(jobConf, context.getConfiguration()); } }
OutputFormatContainer getOutputFormatContainer( org.apache.hadoop.mapred.OutputFormat outputFormat) { return new FileOutputFormatContainer(outputFormat); }
List<String> partitionValues = getPartitionValueList( table, outputInfo.getPartitionValues()); List<String> partitionValues = getPartitionValueList( table, outputInfo.getPartitionValues());
@Override public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { setWorkOutputPath(context); getBaseOutputFormat().getRecordWriter( parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()),
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { //this needs to be manually set, under normal circumstances MR Task does this setWorkOutputPath(context); return new FileOutputCommitterContainer(context, HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed() ? null : new JobConf(context.getConfiguration()).getOutputCommitter()); }
@Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context.getConfiguration()); IMetaStoreClient client = null; try { HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); client = HCatUtil.getHiveMetastoreClient(hiveConf); handleDuplicatePublish(context, jobInfo, client, new Table(jobInfo.getTableInfo().getTable())); } catch (MetaException e) { throw new IOException(e); } catch (TException e) { throw new IOException(e); } finally { HCatUtil.closeHiveClientQuietly(client); } if (!jobInfo.isDynamicPartitioningUsed()) { JobConf jobConf = new JobConf(context.getConfiguration()); getBaseOutputFormat().checkOutputSpecs(null, jobConf); //checkoutputspecs might've set some properties we need to have context reflect that HCatUtil.copyConf(jobConf, context.getConfiguration()); } }