private void handleChecksumException(ChecksumException e) throws IOException { if (this.conf.getBoolean( IO_SKIP_CHECKSUM_ERRORS_KEY, IO_SKIP_CHECKSUM_ERRORS_DEFAULT)) { LOG.warn("Bad checksum at "+getPosition()+". Skipping entries."); sync(getPosition()+this.conf.getInt("io.bytes.per.checksum", 512)); } else { throw e; } }
public List<Object> next() throws IOException, ParseException { if (reader.next(key, value)) { ArrayList<Object> result = new ArrayList<Object>(2); Collections.addAll(result, key, value); offset.increment(reader.syncSeen(), reader.getPosition()); return result; } return null; }
segmentsConsidered++; updateProgress(mStream[i].in.getPosition());
@Override public float getProgress() throws IOException, InterruptedException { if (end == start) { return 0.0f; } else { return Math.min(1.0f, (reader.getPosition() - start) / (float) (end - start)); } } }
/** * Return the progress within the input split * @return 0.0 to 1.0 of the input byte range */ public float getProgress() throws IOException { if (end == start) { return 0.0f; } else { return Math.min(1.0f, (in.getPosition() - start) / (float)(end - start)); } }
protected synchronized boolean next(K key) throws IOException { if (!more) return false; long pos = in.getPosition(); boolean remaining = (in.next(key) != null); if (pos >= end && in.syncSeen()) { more = false; } else { more = remaining; } return more; }
private void handleChecksumException(ChecksumException e) throws IOException { if (this.conf.getBoolean( IO_SKIP_CHECKSUM_ERRORS_KEY, IO_SKIP_CHECKSUM_ERRORS_DEFAULT)) { LOG.warn("Bad checksum at "+getPosition()+". Skipping entries."); sync(getPosition()+this.conf.getInt("io.bytes.per.checksum", 512)); } else { throw e; } }
@Override public void sync(long position) throws IOException { if (position > reader.getPosition()) reader.sync(position); ready = false; }
long pos = dataReader.getPosition(); LongWritable position = new LongWritable(); long nextBlock = pos; while(dataReader.next(key, value)) { if (blockCompressed) { long curPos = dataReader.getPosition(); if (curPos > nextBlock) { pos = dataReader.getPosition(); // next record position
curPosition = data.getPosition();
public void initIOContext(FileSplit split, JobConf job, Class inputFormatClass, RecordReader recordReader) throws IOException { boolean blockPointer = false; long blockStart = -1; FileSplit fileSplit = split; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(job); if (inputFormatClass.getName().contains("SequenceFile")) { SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job); blockPointer = in.isBlockCompressed(); in.sync(fileSplit.getStart()); blockStart = in.getPosition(); in.close(); } else if (recordReader instanceof RCFileRecordReader) { blockPointer = true; blockStart = ((RCFileRecordReader) recordReader).getStart(); } else if (inputFormatClass.getName().contains("RCFile")) { blockPointer = true; RCFile.Reader in = new RCFile.Reader(fs, path, job); in.sync(fileSplit.getStart()); blockStart = in.getPosition(); in.close(); } this.jobConf = job; this.initIOContext(blockStart, blockPointer, path.makeQualified(fs)); this.initIOContextSortedProps(split, recordReader, job); }
@Override public boolean next() throws IOException { if (size() == 0) return false; if (minSegment != null) { //minSegment is non-null for all invocations of next except the first //one. For the first invocation, the priority queue is ready for use //but for the subsequent invocations, first adjust the queue adjustPriorityQueue(minSegment); if (size() == 0) { minSegment = null; return false; } } minSegment = (SegmentDescriptor)top(); long startPos = minSegment.in.getPosition(); // Current position in stream //save the raw key reference rawKey = minSegment.getKey(); //load the raw value. Re-use the existing rawValue buffer if (rawValue == null) { rawValue = minSegment.in.createValueBytes(); } minSegment.nextRawValue(rawValue); long endPos = minSegment.in.getPosition(); // End position after reading value updateProgress(endPos - startPos); return true; }
protected synchronized void open(Path dir, WritableComparator comparator, Configuration conf, SequenceFile.Reader.Option... options ) throws IOException { Path dataFile = new Path(dir, DATA_FILE_NAME); Path indexFile = new Path(dir, INDEX_FILE_NAME); // open the data this.data = createDataFileReader(dataFile, conf, options); this.firstPosition = data.getPosition(); if (comparator == null) { Class<? extends WritableComparable> cls; cls = data.getKeyClass().asSubclass(WritableComparable.class); this.comparator = WritableComparator.get(cls, conf); } else { this.comparator = comparator; } // open the index SequenceFile.Reader.Option[] indexOptions = Options.prependOptions(options, SequenceFile.Reader.file(indexFile)); this.index = new SequenceFile.Reader(conf, indexOptions); }
public void initIOContext(FileSplit split, JobConf job, Class inputFormatClass, RecordReader recordReader) throws IOException { boolean blockPointer = false; long blockStart = -1; FileSplit fileSplit = split; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(job); if (inputFormatClass.getName().contains("SequenceFile")) { SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job); blockPointer = in.isBlockCompressed(); in.sync(fileSplit.getStart()); blockStart = in.getPosition(); in.close(); } else if (recordReader instanceof RCFileRecordReader) { blockPointer = true; blockStart = ((RCFileRecordReader) recordReader).getStart(); } else if (inputFormatClass.getName().contains("RCFile")) { blockPointer = true; RCFile.Reader in = new RCFile.Reader(fs, path, job); in.sync(fileSplit.getStart()); blockStart = in.getPosition(); in.close(); } this.jobConf = job; this.initIOContext(blockStart, blockPointer, path.makeQualified(fs)); this.initIOContextSortedProps(split, recordReader, job); }
/** Reads the final key from the file. * * @param key key to read into */ public synchronized void finalKey(WritableComparable key) throws IOException { long originalPosition = data.getPosition(); // save position try { readIndex(); // make sure index is valid if (count > 0) { data.seek(positions[count-1]); // skip to last indexed entry } else { reset(); // start at the beginning } while (data.next(key)) {} // scan to eof } finally { data.seek(originalPosition); // restore position } }
@Override public long tell() throws IOException {return reader.getPosition();}
@Override public boolean pastSync(long position) throws IOException { return reader.getPosition() >= position && reader.syncSeen(); }
@Override public void sync(long position) throws IOException { if (position > reader.getPosition()) reader.sync(position); ready = false; }
private void adjustPriorityQueue(SegmentDescriptor ms) throws IOException{ long startPos = ms.in.getPosition(); // Current position in stream boolean hasNext = ms.nextRawKey(); long endPos = ms.in.getPosition(); // End position after reading key updateProgress(endPos - startPos); if (hasNext) { adjustTop(); } else { pop(); ms.cleanup(); } }
public long getOffset() throws IOException { return reader.getPosition(); }