@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); } Configuration configuration = new Configuration(); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration); String fileSplitBytesStr = workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); TaskAttemptContext taskAttemptContext = getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID()); try { RecordReader<K, V> recordReader = fileInputFormat.createRecordReader(fileSplit, taskAttemptContext); recordReader.initialize(fileSplit, taskAttemptContext); boolean readKeys = workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); } catch (InterruptedException ie) { throw new IOException(ie); } }
RecordReader<K, V> reader = (RecordReader<K, V>) format.createRecordReader(nextSplit, attemptContext); if (currentReader != null) { currentReader.close();
RecordReader<K, V> reader = (RecordReader<K, V>) format.createRecordReader(nextSplit, attemptContext); if (currentReader != null) { currentReader.close();
protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat, CombineFileSplit split, TaskAttemptContext context, Integer idx) throws IOException, InterruptedException { fileSplit = new FileSplit(split.getPath(idx), split.getOffset(idx), split.getLength(idx), split.getLocations()); delegate = inputFormat.createRecordReader(fileSplit, context); }
protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat, CombineFileSplit split, TaskAttemptContext context, Integer idx) throws IOException, InterruptedException { fileSplit = new FileSplit(split.getPath(idx), split.getOffset(idx), split.getLength(idx), split.getLocations()); delegate = inputFormat.createRecordReader(fileSplit, context); }
@SuppressWarnings("unchecked") RecordReader<K, V> reader = (RecordReader<K, V>) format.createRecordReader(nextSplit, attemptContext); if (currentReader != null) { currentReader.close();
protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat, CombineFileSplit split, TaskAttemptContext context, Integer idx) throws IOException, InterruptedException { fileSplit = new FileSplit(split.getPath(idx), split.getOffset(idx), split.getLength(idx), split.getLocations()); delegate = inputFormat.createRecordReader(fileSplit, context); }
protected CombineFileRecordReaderWrapper(FileInputFormat<K,V> inputFormat, CombineFileSplit split, TaskAttemptContext context, Integer idx) throws IOException, InterruptedException { fileSplit = new FileSplit(split.getPath(idx), split.getOffset(idx), split.getLength(idx), split.getLocations()); delegate = inputFormat.createRecordReader(fileSplit, context); }
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (delegate != null) { delegate.close(); } if (split instanceof CombineFileSplit) { CombineFileSplit combineSplit = (CombineFileSplit) split; FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx), combineSplit.getLength(idx), combineSplit.getLocations()); delegate = getInputFormat().createRecordReader(fileSplit, context); delegate.initialize(fileSplit, context); } else { throw new DatasetOperationException( "Split is not a CombineFileSplit: %s:%s", split.getClass().getCanonicalName(), split); } }
@Override public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException { if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) { throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId()); } Configuration configuration = new Configuration(); FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration); String fileSplitBytesStr = workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY); FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr); TaskAttemptContext taskAttemptContext = getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID()); try { RecordReader<K, V> recordReader = fileInputFormat.createRecordReader(fileSplit, taskAttemptContext); recordReader.initialize(fileSplit, taskAttemptContext); boolean readKeys = workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, DEFAULT_FILE_INPUT_READ_KEYS); return getExtractor(workUnitState, recordReader, fileSplit, readKeys); } catch (InterruptedException ie) { throw new IOException(ie); } }