org.apache.hadoop.mapreduce.lib.input.FileInputFormat.createRecordReader java code examples

@Override
public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
 if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) {
  throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
 }
 Configuration configuration = new Configuration();
 FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration);
 String fileSplitBytesStr = workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY);
 FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr);
 TaskAttemptContext taskAttemptContext =
   getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID());
 try {
  RecordReader<K, V> recordReader = fileInputFormat.createRecordReader(fileSplit, taskAttemptContext);
  recordReader.initialize(fileSplit, taskAttemptContext);
  boolean readKeys = workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, DEFAULT_FILE_INPUT_READ_KEYS);
  return getExtractor(workUnitState, recordReader, fileSplit, readKeys);
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

RecordReader<K, V> reader = (RecordReader<K, V>) format.createRecordReader(nextSplit, attemptContext);
if (currentReader != null) {
  currentReader.close();

RecordReader<K, V> reader = (RecordReader<K, V>) format.createRecordReader(nextSplit, attemptContext);
if (currentReader != null) {
  currentReader.close();

protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat,
 CombineFileSplit split, TaskAttemptContext context, Integer idx)
 throws IOException, InterruptedException {
 fileSplit = new FileSplit(split.getPath(idx),
  split.getOffset(idx),
  split.getLength(idx),
  split.getLocations());
 delegate = inputFormat.createRecordReader(fileSplit, context);
}

protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat,
 CombineFileSplit split, TaskAttemptContext context, Integer idx)
 throws IOException, InterruptedException {
 fileSplit = new FileSplit(split.getPath(idx),
  split.getOffset(idx),
  split.getLength(idx),
  split.getLocations());
 delegate = inputFormat.createRecordReader(fileSplit, context);
}

@SuppressWarnings("unchecked")
RecordReader<K, V> reader =
  (RecordReader<K, V>) format.createRecordReader(nextSplit, attemptContext);
if (currentReader != null) {
 currentReader.close();

protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat,
 CombineFileSplit split, TaskAttemptContext context, Integer idx)
 throws IOException, InterruptedException {
 fileSplit = new FileSplit(split.getPath(idx),
  split.getOffset(idx),
  split.getLength(idx),
  split.getLocations());
 delegate = inputFormat.createRecordReader(fileSplit, context);
}

protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat,
 CombineFileSplit split, TaskAttemptContext context, Integer idx)
 throws IOException, InterruptedException {
 fileSplit = new FileSplit(split.getPath(idx),
  split.getOffset(idx),
  split.getLength(idx),
  split.getLocations());
 delegate = inputFormat.createRecordReader(fileSplit, context);
}

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
 if (delegate != null) {
  delegate.close();
 }
 if (split instanceof CombineFileSplit) {
  CombineFileSplit combineSplit = (CombineFileSplit) split;
  FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx),
    combineSplit.getLength(idx), combineSplit.getLocations());
  delegate = getInputFormat().createRecordReader(fileSplit, context);
  delegate.initialize(fileSplit, context);
 } else {
  throw new DatasetOperationException(
    "Split is not a CombineFileSplit: %s:%s",
    split.getClass().getCanonicalName(), split);
 }
}

@Override
public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
 if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) {
  throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
 }
 Configuration configuration = new Configuration();
 FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration);
 String fileSplitBytesStr = workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY);
 FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr);
 TaskAttemptContext taskAttemptContext =
   getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID());
 try {
  RecordReader<K, V> recordReader = fileInputFormat.createRecordReader(fileSplit, taskAttemptContext);
  recordReader.initialize(fileSplit, taskAttemptContext);
  boolean readKeys = workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, DEFAULT_FILE_INPUT_READ_KEYS);
  return getExtractor(workUnitState, recordReader, fileSplit, readKeys);
 } catch (InterruptedException ie) {
  throw new IOException(ie);
 }
}

Popular methods of FileInputFormat

setInputPaths
Set the array of Paths as the list of inputs for the map-reduce job.
addInputPath
Add a Path to the list of inputs for the map-reduce job.
getInputPaths
Get the list of input Paths for the map-reduce job.
getSplits
Generate the list of files and make them into FileSplits.
listStatus
List input directories. Subclasses may override to, e.g., select only files matching a regular expre
addInputPaths
Add the given comma separated paths to the list of inputs for the map-reduce job.
setMaxInputSplitSize
Set the maximum split size
isSplitable
Is the given filename splitable? Usually, true, but if the file is stream compressed, it will not be
setMinInputSplitSize
Set the minimum input split size
setInputDirRecursive
computeSplitSize
getBlockIndex

Popular in Java

Creating JSON documents from java classes using gson
setRequestProperty (URLConnection)
scheduleAtFixedRate (ScheduledExecutorService)
scheduleAtFixedRate (Timer)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
Properties (java.util)
A Properties object is a Hashtable where the keys and values must be Strings. Each property can have
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Best plugins for Eclipse

How to use createRecordReadermethodin org.apache.hadoop.mapreduce.lib.input.FileInputFormat

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.FileInputFormat.createRecordReader (Showing top 10 results out of 315)

How to use
createRecordReader
method
in
org.apache.hadoop.mapreduce.lib.input.FileInputFormat