org.apache.hadoop.mapred.JobConf.getOutputCommitter java code examples

/**
 * Get the output committer for this output format. This is responsible
 * for ensuring the output is committed correctly.
 * @param context the task context
 * @return an output committer
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
 throws IOException, InterruptedException {
 return new DefaultOutputCommitterContainer(context, new JobConf(context.getConfiguration()).getOutputCommitter());
}

/**
 * Setup task.
 *
 * @throws IOException If failed.
 */
public void setup() throws IOException {
  if (writer != null)
    jobConf.getOutputCommitter().setupTask(new TaskAttemptContextImpl(jobConf, attempt));
}

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
 //this needs to be manually set, under normal circumstances MR Task does this
 setWorkOutputPath(context);
 return new FileOutputCommitterContainer(context,
  HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed() ?
   null :
   new JobConf(context.getConfiguration()).getOutputCommitter());
}

  /**
   * Abort task.
   */
  public void abort() {
    try {
      if (writer != null)
        jobConf.getOutputCommitter().abortTask(new TaskAttemptContextImpl(jobConf, attempt));
    }
    catch (IOException ignore) {
      // No-op.
    }
  }
}

@Override
public void finalizeGlobal(int parallelism) throws IOException {
  try {
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
    // finalize HDFS output format
    outputCommitter.commitJob(jobContext);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

@Test
public void testOpen() throws Exception {
  OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
  DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
  JobConf jobConf = Mockito.spy(new JobConf());
  when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
  HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
  outputFormat.open(1, 1);
  verify(jobConf, times(2)).getOutputCommitter();
  verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
  verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}

@Test
public void testFinalizeGlobal() throws Exception {
  OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
  DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
  JobConf jobConf = Mockito.spy(new JobConf());
  when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
  HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
  outputFormat.finalizeGlobal(1);
  verify(outputCommitter, times(1)).commitJob(any(JobContext.class));
}

/**
 * Commit task.
 *
 * @throws IOException In failed.
 */
public void commit() throws IOException {
  if (writer != null) {
    OutputCommitter outputCommitter = jobConf.getOutputCommitter();
    TaskAttemptContext taskCtx = new TaskAttemptContextImpl(jobConf, attempt);
    if (outputCommitter.needsTaskCommit(taskCtx))
      outputCommitter.commitTask(taskCtx);
  }
}

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
  // enforce sequential open() calls
  synchronized (OPEN_MUTEX) {
    if (Integer.toString(taskNumber + 1).length() > 6) {
      throw new IOException("Task id too large.");
    }
    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
        + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
        + Integer.toString(taskNumber + 1)
        + "_0");
    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
    this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
    this.outputCommitter = this.jobConf.getOutputCommitter();
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    this.outputCommitter.setupJob(jobContext);
    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
  }
}

.getOutputCommitter().commitJob(context);

try {
 new JobConf(currContext.getConfiguration())
   .getOutputCommitter().abortJob(currContext,
     state);
} catch (Exception e) {

  /** {@inheritDoc} */
  @Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;

    JobContext jobCtx = ctx.jobContext();

    try {
      OutputCommitter committer = jobCtx.getJobConf().getOutputCommitter();

      if (abort)
        committer.abortJob(jobCtx, JobStatus.State.FAILED);
      else
        committer.commitJob(jobCtx);
    }
    catch (IOException e) {
      throw new IgniteCheckedException(e);
    }
  }
}

currTaskContext.getJobConf().getOutputCommitter();

  /** {@inheritDoc} */
  @Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;

    try {
      ctx.jobConf().getOutputFormat().checkOutputSpecs(null, ctx.jobConf());

      OutputCommitter committer = ctx.jobConf().getOutputCommitter();

      if (committer != null)
        committer.setupJob(ctx.jobContext());
    }
    catch (IOException e) {
      throw new IgniteCheckedException(e);
    }
  }
}

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
 //this needs to be manually set, under normal circumstances MR Task does this
 setWorkOutputPath(context);
 return new FileOutputCommitterContainer(context,
  HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed() ?
   null :
   new JobConf(context.getConfiguration()).getOutputCommitter());
}

@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
 //this needs to be manually set, under normal circumstances MR Task does this
 setWorkOutputPath(context);
 return new FileOutputCommitterContainer(context,
  HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed() ?
   null :
   new JobConf(context.getConfiguration()).getOutputCommitter());
}

@Override
public void finalizeGlobal(int parallelism) throws IOException {
  try {
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
    // finalize HDFS output format
    outputCommitter.commitJob(jobContext);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

@Override
public void finalizeGlobal(int parallelism) throws IOException {
  try {
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
    // finalize HDFS output format
    outputCommitter.commitJob(jobContext);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

@Override
public void finalizeGlobal(int parallelism) throws IOException {
  try {
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
    // finalize HDFS output format
    outputCommitter.commitJob(jobContext);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

protected void runJobSetupTask(TaskUmbilicalProtocol umbilical) 
throws IOException {
 // do the setup
 getProgress().setStatus("setup");
 conf.getOutputCommitter().setupJob(jobContext);
 done(umbilical);
}

Javadoc

Get the OutputCommitter implementation for the map-reduce job, defaults to FileOutputCommitter if not specified explicitly.

Popular methods of JobConf

<init>
A new map/reduce configuration where the behavior of reading from the default resources can be turne
set
get
setInputFormat
Set the InputFormat implementation for the map-reduce job.
setOutputFormat
Set the OutputFormat implementation for the map-reduce job.
getInt
setMapperClass
Set the Mapper class for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setBoolean

Popular in Java

Parsing JSON documents to java classes using gson
getSharedPreferences (Context)
getSystemService (Context)
getExternalFilesDir (Context)
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
ImageIO (javax.imageio)
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
From CI to AI: The AI layer in your organization

How to use getOutputCommittermethodin org.apache.hadoop.mapred.JobConf

Best Java code snippets using org.apache.hadoop.mapred.JobConf.getOutputCommitter (Showing top 20 results out of 315)

How to use
getOutputCommitter
method
in
org.apache.hadoop.mapred.JobConf