org.apache.hadoop.mapreduce.TaskAttemptContext.getConfiguration java code examples

Refine search

Path.getFileSystem

/**
 * Gets the target output stream where the Avro container file should be written.
 *
 * @param context The task attempt context.
 * @return The target output stream.
 */
protected OutputStream getAvroFileOutputStream(TaskAttemptContext context) throws IOException {
 Path path = new Path(((FileOutputCommitter)getOutputCommitter(context)).getWorkPath(),
  getUniqueFile(context,context.getConfiguration().get("avro.mo.config.namedOutput","part"),org.apache.avro.mapred.AvroOutputFormat.EXT));
 return path.getFileSystem(context.getConfiguration()).create(path);
}

private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException {
 Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration());
 if (schema != null) {
  return schema;
 }
 Path path = split.getPath(idx);
 FileSystem fs = path.getFileSystem(cx.getConfiguration());
 return AvroUtils.getSchemaFromDataFile(path, fs);
}

public MultiRecordWriter(TaskAttemptContext context) throws IOException,
  InterruptedException {
 baseRecordWriters = new LinkedHashMap<String, BaseRecordWriterContainer>();
 String[] aliases = getOutputFormatAliases(context);
 for (String alias : aliases) {
  LOGGER.info("Creating record writer for alias: " + alias);
  TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context);
  Configuration aliasConf = aliasContext.getConfiguration();
  // Create output directory if not already created.
  String outDir = aliasConf.get("mapred.output.dir");
  if (outDir != null) {
   Path outputDir = new Path(outDir);
   FileSystem fs = outputDir.getFileSystem(aliasConf);
   if (!fs.exists(outputDir)) {
    fs.mkdirs(outputDir);
   }
  }
  OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext);
  baseRecordWriters.put(alias,
    new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext),
      aliasContext));
 }
}

context.getConfiguration().set("mapred.output.key.class",
 NullWritable.class.getName());
String jobInfoString = context.getConfiguration().get(
 HCatConstants.HCAT_KEY_OUTPUT_INFO);
OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil
StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(
 context.getConfiguration(), storeInfo);
Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass();
AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde,
 context.getConfiguration());
context.getConfiguration().set("mapred.output.value.class",
 sd.getSerializedClass().getName());
if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()){
   (org.apache.hadoop.mapred.RecordWriter)null, context);
} else {
 Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
 Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()),
      context.getConfiguration().get("mapreduce.output.basename", "part")));
     parentDir.getFileSystem(context.getConfiguration()),
     new JobConf(context.getConfiguration()),
     childPath.toString(),
     InternalUtil.createReporter(context)),

final String finalSegmentString = taskContext.getConfiguration().get(PUBLISHED_SEGMENT_KEY);
if (finalSegmentString == null) {
 throw new IOException("Could not read final segment");
taskContext.progress();
final FileSystem fs = taskContext.getWorkingDirectory().getFileSystem(taskContext.getConfiguration());
final Path taskAttemptDir = getTaskPath(
  context.getJobID(),

@Override
public void initialize(InputSplit split, TaskAttemptContext context)
  throws IOException, InterruptedException {
 FileSplit fileSplit = (FileSplit) split;
 conf = context.getConfiguration();
 Path path = fileSplit.getPath();
 FileSystem fs = path.getFileSystem(conf);
 LOG.info("Initialize HFileRecordReader for {}", path);
 this.in = HFile.createReader(fs, path, conf);
 // The file info must be loaded before the scanner can be used.
 // This seems like a bug in HBase, but it's easily worked around.
 this.in.loadFileInfo();
 this.scanner = in.getScanner(false, false);
}

/**
 * Constructor.
 * @param context The TaskAttempContext to supply the writer with information form the job configuration
 */
public AvroTrevniRecordWriterBase(TaskAttemptContext context) throws IOException {
 schema = initSchema(context);
 meta = filterMetadata(context.getConfiguration());
 writer = new AvroColumnWriter<>(schema, meta, ReflectData.get());
 Path outputPath = FileOutputFormat.getOutputPath(context);
 String dir = FileOutputFormat.getUniqueFile(context, "part", "");
 dirPath = new Path(outputPath.toString() + "/" + dir);
 fs = dirPath.getFileSystem(context.getConfiguration());
 fs.mkdirs(dirPath);
 blockSize = fs.getDefaultBlockSize();
}

Path outputPath = committer.getWorkPath();
FileSystem fs = outputPath.getFileSystem(task.getConfiguration());
if (getCompressOutput(task)) {
 Class<?> codecClass = getOutputCompressorClass(task, DefaultCodec.class);
 codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration());
final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec);

public void commitTask(TaskAttemptContext context) throws IOException {
 Path workPath = getWorkPath();
 FileSystem fs = workPath.getFileSystem(context.getConfiguration());

final Configuration conf = context.getConfiguration();
final boolean writeMultipleTables = conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false) ;
final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY);
     + " cannot be empty");
final FileSystem fs = outputDir.getFileSystem(conf);

final Configuration conf = context.getConfiguration();
LOG.debug("Task output path: " + outputdir);
final FileSystem fs = outputdir.getFileSystem(conf);

 @Override
 public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
  InterruptedException {

  FileSplit fSplit = (FileSplit) split;
  Path path = fSplit.getPath();
  Configuration conf = context.getConfiguration();
  this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf);
  this.end = fSplit.getStart() + fSplit.getLength();

  if (fSplit.getStart() > in.getPosition()) {
   in.sync(fSplit.getStart());
  }

  this.start = in.getPosition();
  more = start < end;

  key = new LongWritable();
  value = new BytesRefArrayWritable();
 }
}

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
  throws IOException, InterruptedException {
 Configuration conf = context.getConfiguration();
 FileSystem fs = outputFile.getFileSystem(conf);

public CustomRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) 
 throws IOException{
 path = split.getPath(index);
 fs = path.getFileSystem(context.getConfiguration());
 startOffset = split.getOffset(index);
 endOffset = startOffset + split.getLength(index);
 fileIn = fs.open(path);
 reader = new LineReader(fileIn);
 pos = startOffset;
}

protected DataOutputStream getDataOutputStream(final TaskAttemptContext job) throws IOException, InterruptedException {
  final Configuration conf = job.getConfiguration();
  boolean isCompressed = getCompressOutput(job);
  CompressionCodec codec = null;
  String extension = "";
  if (isCompressed) {
    final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, conf);
    extension = codec.getDefaultExtension();
  }
  final Path file = super.getDefaultWorkFile(job, extension);
  final FileSystem fs = file.getFileSystem(conf);
  if (!isCompressed) {
    return new DataOutputStream(fs.create(file, false));
  } else {
    return new DataOutputStream(codec.createOutputStream(fs.create(file, false)));
  }
}

public void initialize(InputSplit genericSplit,
            TaskAttemptContext context) throws IOException {
 FileSplit split = (FileSplit) genericSplit;
 job = context.getConfiguration();
 this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength",
                 Integer.MAX_VALUE);
 FileSystem fs = file.getFileSystem(job);
 fileIn = fs.open(split.getPath());
 boolean skipFirstLine = false;

final Configuration conf = context.getConfiguration();
final FileSystem fs = outputdir.getFileSystem(conf);

when(fs.delete(eq(p0), eq(true))).thenReturn(true);
when(fs.delete(eq(p1), eq(true))).thenReturn(true);
doReturn(fs).when(p).getFileSystem(any(Configuration.class));
when(fs.makeQualified(eq(p))).thenReturn(a);
when(context.getConfiguration()).thenReturn(conf);

@Override
public void initialize(final InputSplit genericSplit, final TaskAttemptContext context) throws IOException {
  final FileSplit split = (FileSplit) genericSplit;
  final Configuration configuration = context.getConfiguration();
  if (configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
    this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
  this.gryoReader = GryoReader.build().mapper(
      GryoMapper.build().addRegistries(IoRegistryHelper.createRegistries(ConfUtil.makeApacheConfiguration(configuration))).create()).create();
  long start = split.getStart();
  final Path file = split.getPath();
  if (null != new CompressionCodecFactory(configuration).getCodec(file)) {
    throw new IllegalStateException("Compression is not supported for the (binary) Gryo format");
  }
  // open the file and seek to the start of the split
  this.inputStream = file.getFileSystem(configuration).open(split.getPath());
  this.splitLength = split.getLength();
  if (this.splitLength > 0) this.splitLength -= (seekToHeader(this.inputStream, start) - start);
}

/**
 * Initialize ORC file reader and batch record reader.
 * Please note that `initBatch` is needed to be called after this.
 */
@Override
public void initialize(
  InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
 FileSplit fileSplit = (FileSplit)inputSplit;
 Configuration conf = taskAttemptContext.getConfiguration();
 Reader reader = OrcFile.createReader(
  fileSplit.getPath(),
  OrcFile.readerOptions(conf)
   .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))
   .filesystem(fileSplit.getPath().getFileSystem(conf)));
 Reader.Options options =
  OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength());
 recordReader = reader.rows(options);
}

Popular methods of TaskAttemptContext

getTaskAttemptID
Get the unique name for this task attempt.
setStatus
Set the current status of the task to the given string.
progress
Report progress. The subtypes actually do work in this method.
getOutputFormatClass
getCounter
Get the Counter for the given groupName andcounterName.
getOutputValueClass
getOutputKeyClass
<init>
getJobID
getNumReduceTasks
getInputFormatClass
getMapperClass

Popular in Java

Updating database using SQL prepared statement
scheduleAtFixedRate (ScheduledExecutorService)
setContentView (Activity)
getContentResolver (Context)
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
JFrame (javax.swing)
Top 12 Jupyter Notebook extensions

How to use getConfigurationmethodin org.apache.hadoop.mapreduce.TaskAttemptContext

Best Java code snippets using org.apache.hadoop.mapreduce.TaskAttemptContext.getConfiguration (Showing top 20 results out of 1,953)

Refine search

How to use
getConfiguration
method
in
org.apache.hadoop.mapreduce.TaskAttemptContext