org.apache.hadoop.mapreduce.TaskAttemptContext java code examples

Refine search

 /**
  * {@inheritDoc}
  */
 @Override
 public RecordReader<NullWritable, GenericRecord> createRecordReader(InputSplit split, TaskAttemptContext context)
   throws IOException
 {
  Schema readerSchema = AvroJob.getInputValueSchema(context.getConfiguration());

  if (readerSchema == null) {
   String schemaFilePath = context.getConfiguration().get(CONF_INPUT_VALUE_SCHEMA_PATH);
   if (StringUtils.isNotBlank(schemaFilePath)) {
    log.info("Using file: %s as reader schema.", schemaFilePath);
    try (FSDataInputStream inputStream =
      FileSystem.get(context.getConfiguration()).open(new Path(schemaFilePath))) {
     readerSchema = new Schema.Parser().parse(inputStream);
    }
   }
  }

  if (null == readerSchema) {
   log.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
   log.info("Using a reader schema equal to the writer schema.");
  }
  return new AvroValueRecordReader(readerSchema);
 }
}

final String finalSegmentString = taskContext.getConfiguration().get(PUBLISHED_SEGMENT_KEY);
if (finalSegmentString == null) {
 throw new IOException("Could not read final segment");
);
log.info("Committing new segment [%s]", newSegment);
taskContext.progress();
final FileSystem fs = taskContext.getWorkingDirectory().getFileSystem(taskContext.getConfiguration());
final Path taskAttemptDir = getTaskPath(
  context.getJobID(),
  context.getTaskAttemptID(),
  taskContext.getWorkingDirectory()
);
final Path taskAttemptFile = new Path(taskAttemptDir, DATA_FILE_KEY);
final Path taskAttemptSuccess = new Path(taskAttemptDir, DATA_SUCCESS_KEY);
try (final OutputStream outputStream = fs.create(taskAttemptFile, false, 1 << 10, commitProgressable)) {
 outputStream.write(HadoopDruidConverterConfig.jsonMapper.writeValueAsBytes(newSegment));
fs.create(taskAttemptSuccess, false).close();
taskContext.progress();
taskContext.setStatus("Committed");

@Override
public void stop()
{
 context.progress();
 context.setStatus("STOPPED");
}

private String generateKey(TaskAttemptContext context) throws IOException {
 String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
 if (StringUtils.isBlank(jobInfoString)) { // Avoid the NPE.
  throw new IOException("Could not retrieve OutputJobInfo for TaskAttempt " + context.getTaskAttemptID());
 }
 OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
 return context.getTaskAttemptID().toString() + "@" + jobInfo.getLocation();
}

protected DataOutputStream getDataOutputStream(final TaskAttemptContext job) throws IOException, InterruptedException {
  final Configuration conf = job.getConfiguration();
  boolean isCompressed = getCompressOutput(job);
  CompressionCodec codec = null;
  String extension = "";
  if (isCompressed) {
    final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, conf);
    extension = codec.getDefaultExtension();
  }
  final Path file = super.getDefaultWorkFile(job, extension);
  final FileSystem fs = file.getFileSystem(conf);
  if (!isCompressed) {
    return new DataOutputStream(fs.create(file, false));
  } else {
    return new DataOutputStream(codec.createOutputStream(fs.create(file, false)));
  }
}

 @Override
 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
  InterruptedException {

  FileSplit fSplit = (FileSplit) split;
  Path path = fSplit.getPath();
  Configuration conf = context.getConfiguration();
  this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
  this.end = fSplit.getStart() + fSplit.getLength();

  if (fSplit.getStart() > in.getPosition()) {
   in.sync(fSplit.getStart());
  }

  this.start = in.getPosition();
  more = start < end;

  key = new LongWritable();
  value = new BytesRefArrayWritable();
 }
}

FileSystem fs = FileSystem.get(context.getConfiguration());
if (EtlMultiOutputFormat.isRunMoveData(context)) {
 Path workPath = super.getWorkPath();
 Path baseOutDir = EtlMultiOutputFormat.getDestinationPath(context);
 log.info("Destination base path: " + baseOutDir);
 for (FileStatus f : fs.listStatus(workPath)) {
  String file = f.getPath().getName();
  log.info("work file: " + file);
  if (file.startsWith("data")) {
     getPartitionedPath(context, file, count.getEventCount(), count.getLastKey().getOffset());
   Path dest = new Path(baseOutDir, partitionedFile);
   if (!fs.exists(dest.getParent())) {
    mkdirs(fs, dest.getParent());
  Path tempPath = new Path(workPath, "counts." + context.getConfiguration().get("mapred.task.id"));
  OutputStream outputStream = new BufferedOutputStream(fs.create(tempPath));
  ObjectMapper mapper = new ObjectMapper();
SequenceFile.Writer offsetWriter = SequenceFile.createWriter(fs, context.getConfiguration(),
  new Path(super.getWorkPath(),
    EtlMultiOutputFormat.getUniqueFile(context, EtlMultiOutputFormat.OFFSET_PREFIX, "")),

context.getConfiguration().set("mapred.output.key.class",
 NullWritable.class.getName());
String jobInfoString = context.getConfiguration().get(
 HCatConstants.HCAT_KEY_OUTPUT_INFO);
OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil
StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(
 context.getConfiguration(), storeInfo);
Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass();
AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde,
 context.getConfiguration());
context.getConfiguration().set("mapred.output.value.class",
 sd.getSerializedClass().getName());
if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()){
   (org.apache.hadoop.mapred.RecordWriter)null, context);
} else {
 Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
 Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()),
      context.getConfiguration().get("mapreduce.output.basename", "part")));
     parentDir.getFileSystem(context.getConfiguration()),
     new JobConf(context.getConfiguration()),
     childPath.toString(),
     InternalUtil.createReporter(context)),

 static void setWorkOutputPath(TaskAttemptContext context) throws IOException {
  String outputPath = context.getConfiguration().get("mapred.output.dir");
  //we need to do this to get the task path and set it for mapred implementation
  //since it can't be done automatically because of mapreduce->mapred abstraction
  if (outputPath != null)
   context.getConfiguration().set("mapred.work.output.dir",
    new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString());
 }
}

final Configuration conf = context.getConfiguration();
final boolean writeMultipleTables = conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false) ;
final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY);
if (writeTableNames==null || writeTableNames.isEmpty()) {
 throw new IllegalArgumentException("Configuration parameter " + OUTPUT_TABLE_NAME_CONF_KEY
     + " cannot be empty");
final FileSystem fs = outputDir.getFileSystem(conf);
final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
  HConstants.DEFAULT_MAX_FILE_SIZE);
   Path writerPath = null;
   if (writeMultipleTables) {
    writerPath = new Path(outputDir,new Path(tableRelPath, Bytes
        .toString(family)));

 throws FileNotFoundException, IllegalArgumentException, IOException {
Configuration conf = new Configuration(false);
conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1);
TaskAttemptID tid0 =
 new TaskAttemptID("1363718006656", 1, TaskType.REDUCE, 14, 3);
Path p = spy(new Path("/user/hadoop/out"));
Path a = new Path("hdfs://user/hadoop/out");
Path p0 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_0");
Path p1 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000014_1");
Path p2 = new Path(a, "_temporary/1/attempt_1363718006656_0001_r_000013_0");
when(fs.exists(eq(p0))).thenReturn(true);
when(fs.exists(eq(p1))).thenReturn(true);
when(fs.exists(eq(p2))).thenReturn(true);
when(fs.exists(eq(p3))).thenReturn(false);
when(fs.delete(eq(p0), eq(true))).thenReturn(true);
when(context.getTaskAttemptID()).thenReturn(tid0);
when(context.getConfiguration()).thenReturn(conf);

/**
 * Gets the target output stream where the Avro container file should be written.
 *
 * @param context The task attempt context.
 * @return The target output stream.
 */
protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException {
 Path path = new Path(((FileOutputCommitter)getOutputCommitter(context)).getWorkPath(),
  getUniqueFile(context,context.getConfiguration().get("avro.mo.config.namedOutput","part"),org.apache.avro.mapred.AvroOutputFormat.EXT));
 return path.getFileSystem(context.getConfiguration()).create(path);
}

public MultiRecordWriter(TaskAttemptContext context) throws IOException,
  InterruptedException {
 baseRecordWriters = new LinkedHashMap<String, BaseRecordWriterContainer>();
 String[] aliases = getOutputFormatAliases(context);
 for (String alias : aliases) {
  LOGGER.info("Creating record writer for alias: " + alias);
  TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context);
  Configuration aliasConf = aliasContext.getConfiguration();
  // Create output directory if not already created.
  String outDir = aliasConf.get("mapred.output.dir");
  if (outDir != null) {
   Path outputDir = new Path(outDir);
   FileSystem fs = outputDir.getFileSystem(aliasConf);
   if (!fs.exists(outputDir)) {
    fs.mkdirs(outputDir);
   }
  }
  OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext);
  baseRecordWriters.put(alias,
    new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext),
      aliasContext));
 }
}

public void commitTask(TaskAttemptContext context) throws IOException {
 Path workPath = getWorkPath();
 FileSystem fs = workPath.getFileSystem(context.getConfiguration());
 if (fs.exists(workPath)) {
  long recordCount = getRecordCountFromCounter(context, AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);
  String fileNamePrefix;
  for (FileStatus status : fs.listStatus(workPath, new PathFilter() {
   @Override
   public boolean accept(Path path) {
   Path newPath = new Path(status.getPath().getParent(), fileName);
   LOG.info(String.format("Renaming %s to %s", status.getPath(), newPath));
   fs.rename(status.getPath(), newPath);

Configuration conf = new Configuration();
expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
expect(inputSplit.getStart()).andReturn(0L).anyTimes();
expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();
expect(context.getConfiguration()).andReturn(conf).anyTimes();

/**
 * Constructor.
 * @param context The TaskAttempContext to supply the writer with information form the job configuration
 */
public AvroTrevniRecordWriterBase(TaskAttemptContext context) throws IOException {
 schema = initSchema(context);
 meta = filterMetadata(context.getConfiguration());
 writer = new AvroColumnWriter<>(schema, meta, ReflectData.get());
 Path outputPath = FileOutputFormat.getOutputPath(context);
 String dir = FileOutputFormat.getUniqueFile(context, "part", "");
 dirPath = new Path(outputPath.toString() + "/" + dir);
 fs = dirPath.getFileSystem(context.getConfiguration());
 fs.mkdirs(dirPath);
 blockSize = fs.getDefaultBlockSize();
}

@Override
public void write(Object dummy, Canvas r) throws IOException {
 String suffix = String.format("-%05d.png", canvasesWritten++);
 Path p = new Path(outPath.getParent(), outPath.getName()+suffix);
 FSDataOutputStream outFile = outFS.create(p);
 // Write the merged canvas
 plotter.writeImage(r, outFile, this.vflip);
 outFile.close();
 task.progress();
 
 java.awt.Point imageLocation = mergedCanvas.projectToImageSpace(r.inputMBR.x1, r.inputMBR.y2);
 masterFile.printf("%d,%d,%s\n", imageLocation.x, imageHeight - imageLocation.y, p.getName());
}

public void initialize(InputSplit split, TaskAttemptContext context)
    throws IOException, InterruptedException
{
  FileSplit fileSplit = (FileSplit) split;
  Path filePath = fileSplit.getPath();
  FileSystem fileSys = filePath.getFileSystem(context.getConfiguration());
  shpInputStream = fileSys.open(filePath);
  //assign inputstream to parser and parse file header to init;
  parser = new ShpFileParser(shpInputStream);
  parser.parseShapeFileHead();
}

Path outputPath = committer.getWorkPath();
FileSystem fs = outputPath.getFileSystem(task.getConfiguration());
if (!fs.exists(outputPath)) {
 fs.mkdirs(outputPath);
if (getCompressOutput(task)) {
 Class<?> codecClass = getOutputCompressorClass(task, DefaultCodec.class);
 codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration());
final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec);

public void initialize(InputSplit genericSplit,
            TaskAttemptContext context) throws IOException {
 FileSplit split = (FileSplit) genericSplit;
 job = context.getConfiguration();
 this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
                 Integer.MAX_VALUE);
 start = split.getStart();
 end = start + split.getLength();
 final Path file = split.getPath();
 compressionCodecs = new CompressionCodecFactory(job);
 final CompressionCodec codec = compressionCodecs.getCodec(file);
 FileSystem fs = file.getFileSystem(job);
 fileIn = fs.open(split.getPath());
 boolean skipFirstLine = false;
 if (codec != null) {
  in = new LineReader(codec.createInputStream(fileIn), job);
  end = Long.MAX_VALUE;
 } else {

Javadoc

The context for task attempts.

Most used methods

getConfiguration
getTaskAttemptID
Get the unique name for this task attempt.
setStatus
Set the current status of the task to the given string.
progress
Report progress. The subtypes actually do work in this method.
getOutputFormatClass
getCounter
Get the Counter for the given groupName andcounterName.
getOutputValueClass
getOutputKeyClass
<init>
getJobID
getNumReduceTasks
getInputFormatClass

Popular in Java

Creating JSON documents from java classes using gson
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
startActivity (Activity)
runOnUiThread (Activity)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Properties (java.util)
A Properties object is a Hashtable where the keys and values must be Strings. Each property can have
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
JButton (javax.swing)
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Top plugins for Android Studio

How to useTaskAttemptContext in org.apache.hadoop.mapreduce

Best Java code snippets using org.apache.hadoop.mapreduce.TaskAttemptContext (Showing top 20 results out of 2,178)

Refine search

How to use
TaskAttemptContext
in
org.apache.hadoop.mapreduce