private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum". // When there're multiple attempts for a task, Hive will rely on the partitionId // to figure out if the data are duplicate or not when collecting the final outputs // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles) taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_").append(TaskContext.get().attemptNumber()); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum". // When there're multiple attempts for a task, Hive will rely on the partitionId // to figure out if the data are duplicate or not when collecting the final outputs // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles) taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_").append(TaskContext.get().attemptNumber()); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
@Override public void onTaskCompletion(TaskContext context) { context.isCompleted(); context.isInterrupted(); context.stageId(); context.stageAttemptNumber(); context.partitionId(); context.addTaskCompletionListener(this); } }
@Override public void onTaskCompletion(TaskContext context) { context.isCompleted(); context.isInterrupted(); context.stageId(); context.stageAttemptNumber(); context.partitionId(); context.addTaskCompletionListener(this); } }
@Override public void onTaskCompletion(TaskContext context) { context.isCompleted(); context.isInterrupted(); context.stageId(); context.stageAttemptNumber(); context.partitionId(); context.addTaskCompletionListener(this); } }
public static void test() { TaskContext tc = TaskContext.get(); tc.isCompleted(); tc.isInterrupted(); tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl()); tc.addTaskFailureListener(new JavaTaskFailureListenerImpl()); tc.attemptNumber(); tc.partitionId(); tc.stageId(); tc.stageAttemptNumber(); tc.taskAttemptId(); }
public static void test() { TaskContext tc = TaskContext.get(); tc.isCompleted(); tc.isInterrupted(); tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl()); tc.addTaskFailureListener(new JavaTaskFailureListenerImpl()); tc.attemptNumber(); tc.partitionId(); tc.stageId(); tc.stageAttemptNumber(); tc.taskAttemptId(); }
public static void test() { TaskContext tc = TaskContext.get(); tc.isCompleted(); tc.isInterrupted(); tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl()); tc.addTaskFailureListener(new JavaTaskFailureListenerImpl()); tc.attemptNumber(); tc.partitionId(); tc.stageId(); tc.stageAttemptNumber(); tc.taskAttemptId(); }
@Override public Tuple apply(Tuple2<Text, Tuple> v1) { if (!initialized) { long partitionId = TaskContext.get().partitionId(); Configuration jobConf = PigMapReduce.sJobConfInternal.get(); jobConf.set(PigConstants.TASK_INDEX, Long.toString(partitionId)); jobConf.set(MRConfiguration.TASK_ID, Long.toString(partitionId)); initialized = true; } if (sparkCounters != null && disableCounter == false) { sparkCounters.increment(counterGroupName, counterName, 1L); } return v1._2(); }
private void setupMRLegacyConfigs() { StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_"); taskAttemptIdBuilder.append(System.currentTimeMillis()) .append("_") .append(stageIdFormat.format(TaskContext.get().stageId())) .append("_"); if (isMap()) { taskAttemptIdBuilder.append("m_"); } else { taskAttemptIdBuilder.append("r_"); } // Spark task attempt id is increased by Spark context instead of task, which may introduce // unstable qtest output, since non Hive features depends on this, we always set it to 0 here. taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId())) .append("_0"); String taskAttemptIdStr = taskAttemptIdBuilder.toString(); jobConf.set("mapred.task.id", taskAttemptIdStr); jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr); jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId()); } }
private static SparkFilePath initializeOutput(final String output, final TaskContext context, final String country, final String temporaryOutputFolder, final String targetOutputFolder) { // Create temporary folder for flag output final String workerOutputFolder = SparkFileHelper.combine(temporaryOutputFolder, String.format("p%s_a%s", context.partitionId(), context.taskAttemptId())); final String temporaryFilePath = SparkFileHelper.combine(workerOutputFolder, output, country); final String targetFilePath = SparkFileHelper.combine(targetOutputFolder, output, country); return new SparkFilePath(temporaryFilePath, targetFilePath); }