@Override public RecordReader<DocumentURIWithSourceInfo, VALUE> createRecordReader( InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new ForestReader<VALUE>(); }
public boolean nextKeyValue() throws IOException, InterruptedException { while (bytesRead < split.getLength() && !done) { ExpandedTree tree = getNextTree(); if (tree == null) { continue; setSkipKey(uri, 0, 0, "fragment or link"); value = null; return true; if (!applyFilter(uri, tree)) { continue; largeForestDir, tree, uri); if (value == null) { // send to DocumentMapper for book keeping setSkipKey(uri, 0, 0, "unsupported node type"); return true; setKey(uri, uri, 0, 0); return true;