org.apache.spark.TaskContext.partitionId java code examples

 private void setupMRLegacyConfigs() {
  StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_");
  taskAttemptIdBuilder.append(System.currentTimeMillis())
   .append("_")
   .append(stageIdFormat.format(TaskContext.get().stageId()))
   .append("_");

  if (isMap()) {
   taskAttemptIdBuilder.append("m_");
  } else {
   taskAttemptIdBuilder.append("r_");
  }

  // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed
  // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for
  // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum".
  // When there're multiple attempts for a task, Hive will rely on the partitionId
  // to figure out if the data are duplicate or not when collecting the final outputs
  // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles)
  taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId()))
   .append("_").append(TaskContext.get().attemptNumber());

  String taskAttemptIdStr = taskAttemptIdBuilder.toString();
  jobConf.set("mapred.task.id", taskAttemptIdStr);
  jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr);
  jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId());
 }
}

 private void setupMRLegacyConfigs() {
  StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_");
  taskAttemptIdBuilder.append(System.currentTimeMillis())
   .append("_")
   .append(stageIdFormat.format(TaskContext.get().stageId()))
   .append("_");

  if (isMap()) {
   taskAttemptIdBuilder.append("m_");
  } else {
   taskAttemptIdBuilder.append("r_");
  }

  // Hive requires this TaskAttemptId to be unique. MR's TaskAttemptId is composed
  // of "attempt_timestamp_jobNum_m/r_taskNum_attemptNum". The counterpart for
  // Spark should be "attempt_timestamp_stageNum_m/r_partitionId_attemptNum".
  // When there're multiple attempts for a task, Hive will rely on the partitionId
  // to figure out if the data are duplicate or not when collecting the final outputs
  // (see org.apache.hadoop.hive.ql.exec.Utils.removeTempOrDuplicateFiles)
  taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId()))
   .append("_").append(TaskContext.get().attemptNumber());

  String taskAttemptIdStr = taskAttemptIdBuilder.toString();
  jobConf.set("mapred.task.id", taskAttemptIdStr);
  jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr);
  jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId());
 }
}

 @Override
 public void onTaskCompletion(TaskContext context) {
  context.isCompleted();
  context.isInterrupted();
  context.stageId();
  context.stageAttemptNumber();
  context.partitionId();
  context.addTaskCompletionListener(this);
 }
}

 @Override
 public void onTaskCompletion(TaskContext context) {
  context.isCompleted();
  context.isInterrupted();
  context.stageId();
  context.stageAttemptNumber();
  context.partitionId();
  context.addTaskCompletionListener(this);
 }
}

 @Override
 public void onTaskCompletion(TaskContext context) {
  context.isCompleted();
  context.isInterrupted();
  context.stageId();
  context.stageAttemptNumber();
  context.partitionId();
  context.addTaskCompletionListener(this);
 }
}

public static void test() {
 TaskContext tc = TaskContext.get();
 tc.isCompleted();
 tc.isInterrupted();
 tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl());
 tc.addTaskFailureListener(new JavaTaskFailureListenerImpl());
 tc.attemptNumber();
 tc.partitionId();
 tc.stageId();
 tc.stageAttemptNumber();
 tc.taskAttemptId();
}

public static void test() {
 TaskContext tc = TaskContext.get();
 tc.isCompleted();
 tc.isInterrupted();
 tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl());
 tc.addTaskFailureListener(new JavaTaskFailureListenerImpl());
 tc.attemptNumber();
 tc.partitionId();
 tc.stageId();
 tc.stageAttemptNumber();
 tc.taskAttemptId();
}

public static void test() {
 TaskContext tc = TaskContext.get();
 tc.isCompleted();
 tc.isInterrupted();
 tc.addTaskCompletionListener(new JavaTaskCompletionListenerImpl());
 tc.addTaskFailureListener(new JavaTaskFailureListenerImpl());
 tc.attemptNumber();
 tc.partitionId();
 tc.stageId();
 tc.stageAttemptNumber();
 tc.taskAttemptId();
}

@Override
public Tuple apply(Tuple2<Text, Tuple> v1) {
  if (!initialized) {
    long partitionId = TaskContext.get().partitionId();
    Configuration jobConf = PigMapReduce.sJobConfInternal.get();
    jobConf.set(PigConstants.TASK_INDEX, Long.toString(partitionId));
    jobConf.set(MRConfiguration.TASK_ID, Long.toString(partitionId));
    initialized = true;
  }
  if (sparkCounters != null && disableCounter == false) {
    sparkCounters.increment(counterGroupName, counterName, 1L);
  }
  return v1._2();
}

 private void setupMRLegacyConfigs() {
  StringBuilder taskAttemptIdBuilder = new StringBuilder("attempt_");
  taskAttemptIdBuilder.append(System.currentTimeMillis())
   .append("_")
   .append(stageIdFormat.format(TaskContext.get().stageId()))
   .append("_");

  if (isMap()) {
   taskAttemptIdBuilder.append("m_");
  } else {
   taskAttemptIdBuilder.append("r_");
  }

  // Spark task attempt id is increased by Spark context instead of task, which may introduce
  // unstable qtest output, since non Hive features depends on this, we always set it to 0 here.
  taskAttemptIdBuilder.append(taskIdFormat.format(TaskContext.get().partitionId()))
   .append("_0");

  String taskAttemptIdStr = taskAttemptIdBuilder.toString();
  jobConf.set("mapred.task.id", taskAttemptIdStr);
  jobConf.set("mapreduce.task.attempt.id", taskAttemptIdStr);
  jobConf.setInt("mapred.task.partition", TaskContext.get().partitionId());
 }
}

private static SparkFilePath initializeOutput(final String output, final TaskContext context,
    final String country, final String temporaryOutputFolder,
    final String targetOutputFolder)
{
  // Create temporary folder for flag output
  final String workerOutputFolder = SparkFileHelper.combine(temporaryOutputFolder,
      String.format("p%s_a%s", context.partitionId(), context.taskAttemptId()));
  final String temporaryFilePath = SparkFileHelper.combine(workerOutputFolder, output,
      country);
  final String targetFilePath = SparkFileHelper.combine(targetOutputFolder, output, country);
  return new SparkFilePath(temporaryFilePath, targetFilePath);
}

Popular methods of TaskContext

Popular in Java

Start an intent from android
setRequestProperty (URLConnection)
onRequestPermissionsResult (Fragment)
getApplicationContext (Context)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Top 12 Jupyter Notebook extensions

How to use partitionIdmethodin org.apache.spark.TaskContext

Best Java code snippets using org.apache.spark.TaskContext.partitionId (Showing top 11 results out of 315)

How to use
partitionId
method
in
org.apache.spark.TaskContext