private boolean nextFile() throws IOException { currentPath++; if (currentPath >= paths.length) { return false; } // Output the archive filename, to help with debugging: log.info("Opening nextFile: " + paths[currentPath]); // Set up the ArchiveReader: this.status = this.filesystem.getFileStatus(paths[currentPath]); datainputstream = this.filesystem.open(paths[currentPath]); arcreader = (ArchiveReader) ArchiveReaderFactory.get( paths[currentPath].getName(), datainputstream, true); // Set to strict reading, in order to cope with malformed archive files // which cause an infinite loop otherwise. arcreader.setStrict(true); // Get the iterator: iterator = arcreader.iterator(); this.archiveName = paths[currentPath].getName(); return true; }
private boolean nextFile() throws IOException { currentPath++; if (currentPath >= paths.length) { return false; } // Output the archive filename, to help with debugging: log.info("Opening nextFile: " + paths[currentPath]); // Set up the ArchiveReader: this.status = this.filesystem.getFileStatus(paths[currentPath]); datainputstream = this.filesystem.open(paths[currentPath]); arcreader = (ArchiveReader) ArchiveReaderFactory .get(paths[currentPath].getName(), datainputstream, true); // Set to strict reading, in order to cope with malformed archive files // which cause an infinite loop otherwise. arcreader.setStrict(true); // Get the iterator: iterator = arcreader.iterator(); this.archiveName = paths[currentPath].getName(); return true; }
private boolean nextFile() throws IOException { currentPath++; if (currentPath >= paths.length) { return false; } // Output the archive filename, to help with debugging: log.info("Opening nextFile: " + paths[currentPath]); // Set up the ArchiveReader: this.status = this.filesystem.getFileStatus(paths[currentPath]); datainputstream = this.filesystem.open(paths[currentPath]); arcreader = (ArchiveReader) ArchiveReaderFactory.get( paths[currentPath].getName(), datainputstream, true); // Set to strict reading, in order to cope with malformed archive files // which cause an infinite loop otherwise. arcreader.setStrict(true); // Get the iterator: iterator = arcreader.iterator(); this.archiveName = paths[currentPath].getName(); return true; }
private boolean nextFile() throws IOException { currentPath++; if (currentPath >= paths.length) { return false; } // Output the archive filename, to help with debugging: log.info("Opening nextFile: " + paths[currentPath]); // Set up the ArchiveReader: this.status = this.filesystem.getFileStatus(paths[currentPath]); datainputstream = this.filesystem.open(paths[currentPath]); arcreader = (ArchiveReader) ArchiveReaderFactory .get(paths[currentPath].getName(), datainputstream, true); // Set to strict reading, in order to cope with malformed archive files // which cause an infinite loop otherwise. arcreader.setStrict(true); // Get the iterator: iterator = arcreader.iterator(); this.archiveName = paths[currentPath].getName(); return true; }
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); reader = ArchiveReaderFactory.get(split.getPath().toString(), new BufferedInputStream(fileIn), true); if (reader instanceof ARCReader) { format = ArchiveFormat.ARC; iter = reader.iterator(); } if (reader instanceof WARCReader) { format = ArchiveFormat.WARC; iter = reader.iterator(); } this.pos = start; }
for (Iterator<ArchiveRecord> ii = iterator(); ii.hasNext();) { ArchiveRecord r = ii.next(); if (toFile) {
for (Iterator<ArchiveRecord> ii = iterator(); ii.hasNext();) { ArchiveRecord r = ii.next(); if (toFile) {
for (Iterator<ArchiveRecord> ii = iterator(); ii.hasNext();) { ArchiveRecord r = ii.next(); if (toFile) {
int recordCount = 0; setStrict(true); for (Iterator<ArchiveRecord> i = iterator(); i.hasNext();) { recordCount++; ArchiveRecord r = i.next();
int recordCount = 0; setStrict(true); for (Iterator<ArchiveRecord> i = iterator(); i.hasNext();) { recordCount++; ArchiveRecord r = i.next();
int recordCount = 0; setStrict(true); for (Iterator<ArchiveRecord> i = iterator(); i.hasNext();) { recordCount++; ArchiveRecord r = i.next();
Iterator<ArchiveRecord> ir = reader.iterator(); int recordCount = 1; int lastFailedRecord = 0;
Iterator<ArchiveRecord> ir = reader.iterator(); int recordCount = 1; int lastFailedRecord = 0;
Iterator<ArchiveRecord> it = ar.iterator();
Iterator<ArchiveRecord> it = ar.iterator();