org.apache.hadoop.mapreduce.TaskID java code examples

Refine search

static synchronized String getOutputName(TaskAttemptContext context) {
 return context.getConfiguration().get("mapreduce.output.basename", "part")
   + "-" + NUMBER_FORMAT.format(context.getTaskAttemptID().getTaskID().getId());
}

private void mockTaskAttemptContext(String indexType) {
 TaskAttemptID fakeTaskId = new TaskAttemptID(new TaskID("foo_task_" + indexType, 123, TaskType.REDUCE, 2), 2);
 when(fakeTaskAttemptContext.getTaskAttemptID()).thenReturn(fakeTaskId);
 when(fakeTaskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
}

/**
 * @param ctx Context for IO operations.
 */
public HadoopV2Context(HadoopV2TaskContext ctx) {
  super(ctx.jobConf(), ctx.jobContext().getJobID());
  taskAttemptID = ctx.attemptId();
  conf.set(MRJobConfig.ID, taskAttemptID.getJobID().toString());
  conf.set(MRJobConfig.TASK_ID, taskAttemptID.getTaskID().toString());
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptID.toString());
  output = ctx.output();
  input = ctx.input();
  this.ctx = ctx;
}

@Override
protected void doSetup(Context context) throws IOException {
  super.bindCurrentConfiguration(context.getConfiguration());
  Configuration conf = context.getConfiguration();
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
  String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
  CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
  cubeConfig = cube.getConfig();
  baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId();
  baseCuboidRowCountInMappers = Lists.newLinkedList();
  output = conf.get(BatchConstants.CFG_OUTPUT_PATH);
  samplingPercentage = Integer
      .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
  taskId = context.getTaskAttemptID().getTaskID().getId();
  cuboidHLLMap = Maps.newHashMap();
}

private org.apache.hadoop.mapreduce.OutputCommitter 
createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception {
 org.apache.hadoop.mapreduce.OutputCommitter committer = null;
 LOG.info("OutputCommitter set in config "
   + conf.get("mapred.output.committer.class"));
 if (newApiCommitter) {
  org.apache.hadoop.mapreduce.TaskID taskId =
    new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0);
  org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID =
    new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0);
  org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = 
    new TaskAttemptContextImpl(conf, taskAttemptID);
  OutputFormat outputFormat =
   ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
  committer = outputFormat.getOutputCommitter(taskContext);
 } else {
  committer = ReflectionUtils.newInstance(conf.getClass(
    "mapred.output.committer.class", FileOutputCommitter.class,
    org.apache.hadoop.mapred.OutputCommitter.class), conf);
 }
 LOG.info("OutputCommitter is " + committer.getClass().getName());
 return committer;
}

@Override
protected void setup(Context context) throws IOException,
  InterruptedException {
 Configuration conf = context.getConfiguration();
 this.deduplicate = conf.getBoolean("dedup", true);
 if (deduplicate)
  this.threshold = conf.getFloat("threshold", 1E-5f);
 this.columnBoundaries = SpatialSite.getReduceSpace(context.getConfiguration());
 Path outputPath = DelaunayTriangulationOutputFormat.getOutputPath(context);
 Path finalPath = new Path(outputPath, String.format("m-%05d.final", context.getTaskAttemptID().getTaskID().getId()));
 FileSystem fs = finalPath.getFileSystem(context.getConfiguration());
 writer = new TriangulationRecordWriter(fs, null, finalPath, context);
}

  private void futureGet(Context context) throws IOException {
    try {
      future.get();
    } catch (Exception e) {
      throw new IOException("Failed to build cube in mapper " + context.getTaskAttemptID().getTaskID().getId(), e);
    }
  }
}

/**
 * Write random values to the writer assuming a table created using
 * {@link #FAMILIES} as column family descriptors
 */
private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
  TaskAttemptContext context, Set<byte[]> families, int numRows)
  throws IOException, InterruptedException {
 byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
 int valLength = 10;
 byte valBytes[] = new byte[valLength];
 int taskId = context.getTaskAttemptID().getTaskID().getId();
 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
 final byte [] qualifier = Bytes.toBytes("data");
 Random random = new Random();
 for (int i = 0; i < numRows; i++) {
  Bytes.putInt(keyBytes, 0, i);
  random.nextBytes(valBytes);
  ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
  for (byte[] family : families) {
   Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
   writer.write(key, kv);
  }
 }
}

private static TaskAttemptContext getTaskContext(TaskAttemptContext baseContext, Job job) {
 org.apache.hadoop.mapreduce.TaskAttemptID baseTaskId = baseContext.getTaskAttemptID();
 // Create a task ID context with our specialized job ID.
 org.apache.hadoop.mapreduce.TaskAttemptID  taskId;
 taskId = new org.apache.hadoop.mapreduce.TaskAttemptID(job.getJobID().getJtIdentifier(),
     job.getJobID().getId(),
     baseTaskId.isMap(),
     baseTaskId.getTaskID().getId(),
     baseTaskId.getId());
 return new TaskAttemptContextWrapper(baseContext, job.getConfiguration(), taskId);
}

/**
 * Downgrade a new TaskID to an old one
 * @param old a new or old TaskID
 * @return either old or a new TaskID build to match old
 */
public static TaskID downgrade(org.apache.hadoop.mapreduce.TaskID old) {
 if (old instanceof TaskID) {
  return (TaskID) old;
 } else {
  return new TaskID(JobID.downgrade(old.getJobID()), old.getTaskType(), 
           old.getId());
 }
}

private Random createRandom(Context context) {
 long taskId = 0;
 if (context.getTaskAttemptID() != null) { // MRUnit returns null
  LOGGER.debug("context.getTaskAttemptID().getId(): {}", context.getTaskAttemptID().getId());
  LOGGER.debug("context.getTaskAttemptID().getTaskID().getId(): {}", context.getTaskAttemptID().getTaskID().getId());
  taskId = context.getTaskAttemptID().getTaskID().getId(); // taskId = 0, 1, ..., N
 }
 // create a good random seed, yet ensure deterministic PRNG sequence for easy reproducability
 return new Random(421439783L * (taskId + 1));
}

private RecordWriter<KeyT, ValueT> initRecordWriter(
  OutputFormat<KeyT, ValueT> outputFormatObj, TaskAttemptContext taskAttemptContext)
  throws IllegalStateException {
 try {
  LOGGER.info(
    "Creating new RecordWriter for task {} of Job with id {}.",
    taskAttemptContext.getTaskAttemptID().getTaskID().getId(),
    taskAttemptContext.getJobID().getJtIdentifier());
  return outputFormatObj.getRecordWriter(taskAttemptContext);
 } catch (InterruptedException | IOException e) {
  throw new IllegalStateException("Unable to create RecordWriter object: ", e);
 }
}

public static int getTaskID(TaskAttemptContext context) {
  TaskAttemptID taskAttemptID = context.getTaskAttemptID();
  TaskType taskType = taskAttemptID.getTaskType();
  return taskAttemptID.getTaskID().getId();
}

@Test
public void testCloneMapContext() throws Exception {
 TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
 TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);
 MapContext<IntWritable, IntWritable, IntWritable, IntWritable> mapContext =
 new MapContextImpl<IntWritable, IntWritable, IntWritable, IntWritable>(
   conf, taskAttemptid, null, null, null, null, null);
 Mapper<IntWritable, IntWritable, IntWritable, IntWritable>.Context mapperContext = 
  new WrappedMapper<IntWritable, IntWritable, IntWritable, IntWritable>().getMapContext(
    mapContext);
 ContextFactory.cloneMapContext(mapperContext, conf, null, null);
}

public void setDatum(Object oDatum) {
 this.datum = (TaskFinished)oDatum;
 this.taskid = TaskID.forName(datum.taskid.toString());
 if (datum.successfulAttemptId != null) {
  this.successfulAttemptId = TaskAttemptID
    .forName(datum.successfulAttemptId.toString());
 }
 this.finishTime = datum.finishTime;
 this.taskType = TaskType.valueOf(datum.taskType.toString());
 this.status = datum.status.toString();
 this.counters = EventReader.fromAvro(datum.counters);
}

/**
 * Mask the job ID part in a {@link TaskID}.
 * 
 * @param taskId
 *          raw {@link TaskID} read from trace
 * @return masked {@link TaskID} with empty {@link JobID}.
 */
private TaskID maskTaskID(TaskID taskId) {
 JobID jobId = new JobID();
 TaskType taskType = taskId.getTaskType();
 return new TaskID(jobId, taskType, taskId.getId());
}

@Override
protected void setup(Context context)
  throws IOException, InterruptedException {
 Path attemptDir = FileOutputFormat.getWorkOutputPath(context);
 String filename = context.getTaskAttemptID()
   .getTaskID().toString();
 Path sideEffectFile = new Path(attemptDir, filename);
 sideEffectStream = FileSystem.get(context.getConfiguration())
   .create(sideEffectFile);
 log.info("Opened file = {}", sideEffectFile);
}

private void processTaskAttemptStartedEvent(TaskAttemptStartedEvent event) {
 ParsedTaskAttempt attempt =
   getOrMakeTaskAttempt(event.getTaskType(), event.getTaskId().toString(),
     event.getTaskAttemptId().toString());
 if (attempt == null) {
  return;
 }
 attempt.setStartTime(event.getStartTime());
 attempt.putTrackerName(event.getTrackerName());
 attempt.putHttpPort(event.getHttpPort());
 attempt.putShufflePort(event.getShufflePort());
}

/**
 * Test of readFields method, of class TaskID.
 */
@Test
public void testReadFields() throws Exception {
 ByteArrayOutputStream baos = new ByteArrayOutputStream();
 DataOutputStream out = new DataOutputStream(baos);
 out.writeInt(0);
 out.writeInt(1);
 WritableUtils.writeVInt(out, 4);
 out.write(new byte[] {0x31, 0x32, 0x33, 0x34});
 WritableUtils.writeEnum(out, TaskType.REDUCE);
 DataInputByteBuffer in = new DataInputByteBuffer();
 in.reset(ByteBuffer.wrap(baos.toByteArray()));
 TaskID instance = new TaskID();
 instance.readFields(in);
 assertEquals("The readFields() method did not produce the expected task ID",
   "task_1234_0001_r_000000", instance.toString());
}

/**
 * Downgrade a new TaskID to an old one
 * @param old a new or old TaskID
 * @return either old or a new TaskID build to match old
 */
public static TaskID downgrade(org.apache.hadoop.mapreduce.TaskID old) {
 if (old instanceof TaskID) {
  return (TaskID) old;
 } else {
  return new TaskID(JobID.downgrade(old.getJobID()), old.isMap(), 
           old.getId());
 }
}

Javadoc

TaskID represents the immutable and unique identifier for a Map or Reduce Task. Each TaskID encompasses multiple attempts made to execute the Map or Reduce Task, each of which are uniquely indentified by their TaskAttemptID. TaskID consists of 3 parts. First part is the JobID, that this TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r' representing whether the task is a map task or a reduce task. And the third part is the task number.
An example TaskID is : task_200707121733_0003_m_000005 , which represents the fifth map task in the third job running at the jobtracker started at 200707121733.

Applications should never construct or parse TaskID strings , but rather use appropriate constructors or #forName(String) method.

Most used methods

getId
<init>
Constructs a TaskID object from given JobID.
toString
forName
Construct a TaskID object from given string
getTaskType
Gets the TaskType corresponding to the character
getJobID
Returns the JobID object that this tip belongs to
equals
appendTo
Add the unique string to the given builder.
compareTo
Compare TaskInProgressIds by first jobIds, then by tip numbers. Reduces are defined as greater then
hashCode
readFields
write

Popular in Java

Making http requests using okhttp
runOnUiThread (Activity)
getExternalFilesDir (Context)
setRequestProperty (URLConnection)
Socket (java.net)
Provides a client-side TCP socket.
Connection (java.sql)
A connection represents a link from a Java application to a database. All SQL statements and results
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Github Copilot alternatives

How to useTaskID in org.apache.hadoop.mapreduce

Best Java code snippets using org.apache.hadoop.mapreduce.TaskID (Showing top 20 results out of 1,548)

Refine search

How to use
TaskID
in
org.apache.hadoop.mapreduce