@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { //this needs to be manually set, under normal circumstances MR Task does this setWorkOutputPath(context); return new FileOutputCommitterContainer(context, HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed() ? null : new JobConf(context.getConfiguration()).getOutputCommitter()); }
@Override public void commitJob(JobContext jobContext) throws IOException { if (dynamicPartitioningUsed) { discoverPartitions(jobContext); if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { getBaseOutputCommitter().commitJob( HCatMapRedUtil.createJobContext(jobContext)); registerPartitions(jobContext); if (getOutputDirMarking(jobContext.getConfiguration())) { Path outputPath = new Path(jobInfo.getLocation()); FileSystem fileSys = outputPath.getFileSystem(jobContext cancelDelegationTokens(jobContext);
partPath, false); partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo); if (harProcessor.isEnabled()) { harProcessor.exec(context, partition, partPath);
private void moveCustomLocationTaskOutputs(FileSystem fs, Table table, Configuration conf) throws IOException { // in case of custom dynamic partitions, we can't just move the sub-tree of partition root // directory since the partitions location contain regex pattern. We need to first find the // final destination of each partition and move its output. for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) { Path src = new Path(entry.getKey()); Path destPath = new Path(getFinalDynamicPartitionDestination(table, entry.getValue(), jobInfo)); moveTaskOutputs(fs, src, src, destPath, true, true); // dryRun = true, immutable = true moveTaskOutputs(fs, src, src, destPath, false, true); } // delete the parent temp directory of all custom dynamic partitions Path parentPath = new Path(getCustomPartitionRootLocation(jobInfo, conf)); if (fs.exists(parentPath)) { fs.delete(parentPath, true); } }
try { if (dynamicPartitioningUsed) { discoverPartitions(jobContext); if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { getBaseOutputCommitter().abortJob(mapRedJobContext, state); } else if (dynamicPartitioningUsed) { for (JobContext currContext : contextDiscoveredByPath.values()) { if (dynamicPartitioningUsed) { if (!customDynamicLocationUsed) { src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() .getPartitionKeysSize())); } else { src = new Path(getCustomPartitionRootLocation(jobInfo, jobContext.getConfiguration())); cancelDelegationTokens(jobContext);
private void registerPartitions(JobContext context) throws IOException{ if (dynamicPartitioningUsed){ discoverPartitions(context); moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable()); if (!src.equals(tblPath)) { fs.delete(src, true); partitionsToAdd.add(constructPartition(context, jobInfo, tblPath.toString(), null, jobInfo.getPartitionValues(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, hiveConf, status)); } else { for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) { partitionsToAdd.add(constructPartition(context, jobInfo, getPartitionRootLocation(entry.getKey(), entry.getValue().size()), entry.getKey(), entry.getValue(), jobInfo.getOutputSchema(), getStorerParameterMap(storer), table, fs, hiveConf, status)); moveTaskOutputs(fs, src, src, tblPath, true, true); // dryRun = true, immutable = true moveTaskOutputs(fs, src, src, tblPath, false, true); if (!src.equals(tblPath)){ fs.delete(src, true); moveCustomLocationTaskOutputs(fs, table, hiveConf); updateTableSchema(client, table, jobInfo.getOutputSchema()); LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); client.add_partitions(partitionsToAdd); updateTableSchema(client, table, jobInfo.getOutputSchema());
if (i++ != 0) { applyGroupAndPerms(fs, partPath, perms, grpName, false); partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); applyGroupAndPerms(fs, partPath, perms, grpName, true); String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo); if (harProcessor.isEnabled()) { harProcessor.exec(context, partition, partPath);
@Override public void setupJob(JobContext context) throws IOException { if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); } // in dynamic usecase, called through FileRecordWriterContainer }
final Path finalOutputPath = getFinalPath(fs, file, srcDir, destDir, immutable); FileStatus fileStatus = fs.getFileStatus(file); moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable); moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable);
new FileOutputCommitterContainer(job, null).commitJob(job); } else { new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED);
private String getFinalDynamicPartitionDestination(Table table, Map<String, String> partKVs, OutputJobInfo jobInfo) { Path partPath = new Path(table.getTTable().getSd().getLocation()); if (!customDynamicLocationUsed) { // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA for (FieldSchema partKey : table.getPartitionKeys()) { partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); } return partPath.toString(); } else { // if custom root specified, update the parent path if (jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) { partPath = new Path(partPath, jobInfo.getCustomDynamicRoot()); } return new Path(partPath, HCatFileUtil.resolveCustomPath(jobInfo, partKVs, false)).toString(); } }
job.setNumReduceTasks(0); assertTrue(job.waitForCompletion(true)); new FileOutputCommitterContainer(job, null).cleanupJob(job); } catch (Exception e) { caughtException = true;
private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, String group, boolean recursive) throws IOException { if(LOG.isDebugEnabled()) { LOG.debug("applyGroupAndPerms : " + dir + " perms: " + permission + " group: " + group + " recursive: " + recursive); } fs.setPermission(dir, permission); if (recursive) { for (FileStatus fileStatus : fs.listStatus(dir)) { if (fileStatus.isDir()) { applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true); } else { fs.setPermission(fileStatus.getPath(), permission); } } } }
private void registerPartitions(JobContext context) throws IOException{ if (dynamicPartitioningUsed){ discoverPartitions(context); moveTaskOutputs(fs, src, src, tblPath, false, table.isImmutable()); if (!src.equals(tblPath)){ fs.delete(src, true); if (!dynamicPartitioningUsed){ partitionsToAdd.add( constructPartition( context,jobInfo, tblPath.toString(), null, jobInfo.getPartitionValues() ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) ,table, fs ,grpName,perms)); for (Entry<String,Map<String,String>> entry : partitionsDiscoveredByPath.entrySet()){ partitionsToAdd.add( constructPartition( context,jobInfo, getPartitionRootLocation(entry.getKey(),entry.getValue().size()) ,entry.getKey(), entry.getValue() ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) ,table, fs ,grpName,perms)); moveTaskOutputs(fs, src, src, tblPath, true, true); // dryRun = true, immutable = true moveTaskOutputs(fs, src, src, tblPath, false, true); if (!src.equals(tblPath)){
try { if (dynamicPartitioningUsed) { discoverPartitions(jobContext); if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { getBaseOutputCommitter().abortJob(mapRedJobContext, state); } else if (dynamicPartitioningUsed) { for (JobContext currContext : contextDiscoveredByPath.values()) { if (dynamicPartitioningUsed) { if (!customDynamicLocationUsed) { src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() .getPartitionKeysSize())); } else { src = new Path(getCustomPartitionRootLocation(jobInfo, jobContext.getConfiguration())); cancelDelegationTokens(jobContext);
private void moveCustomLocationTaskOutputs(FileSystem fs, Table table, Configuration conf) throws IOException { // in case of custom dynamic partitions, we can't just move the sub-tree of partition root // directory since the partitions location contain regex pattern. We need to first find the // final destination of each partition and move its output. for (Entry<String, Map<String, String>> entry : partitionsDiscoveredByPath.entrySet()) { Path src = new Path(entry.getKey()); Path destPath = new Path(getFinalDynamicPartitionDestination(table, entry.getValue(), jobInfo)); moveTaskOutputs(fs, src, src, destPath, true, true); // dryRun = true, immutable = true moveTaskOutputs(fs, src, src, destPath, false, true); } // delete the parent temp directory of all custom dynamic partitions Path parentPath = new Path(getCustomPartitionRootLocation(jobInfo, conf)); if (fs.exists(parentPath)) { fs.delete(parentPath, true); } }
if (i++ != 0) { applyGroupAndPerms(fs, partPath, perms, grpName, false); partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); applyGroupAndPerms(fs, partPath, perms, grpName, true); String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo); if (harProcessor.isEnabled()) { harProcessor.exec(context, partition, partPath);
@Override public void setupTask(TaskAttemptContext context) throws IOException { if (!dynamicPartitioningUsed) { getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); } }
final Path finalOutputPath = getFinalPath(fs, file, srcDir, destDir, immutable); FileStatus fileStatus = fs.getFileStatus(file); moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable); moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun, immutable);
private String getFinalDynamicPartitionDestination(Table table, Map<String, String> partKVs, OutputJobInfo jobInfo) { Path partPath = new Path(table.getTTable().getSd().getLocation()); if (!customDynamicLocationUsed) { // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA for (FieldSchema partKey : table.getPartitionKeys()) { partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); } return partPath.toString(); } else { // if custom root specified, update the parent path if (jobInfo.getCustomDynamicRoot() != null && jobInfo.getCustomDynamicRoot().length() > 0) { partPath = new Path(partPath, jobInfo.getCustomDynamicRoot()); } return new Path(partPath, HCatFileUtil.resolveCustomPath(jobInfo, partKVs, false)).toString(); } }