private boolean nextFile() throws IOException { currentPath++; if (currentPath >= paths.length) { return false; } // Output the archive filename, to help with debugging: log.info("Opening nextFile: " + paths[currentPath]); // Set up the ArchiveReader: this.status = this.filesystem.getFileStatus(paths[currentPath]); datainputstream = this.filesystem.open(paths[currentPath]); arcreader = (ArchiveReader) ArchiveReaderFactory .get(paths[currentPath].getName(), datainputstream, true); // Set to strict reading, in order to cope with malformed archive files // which cause an infinite loop otherwise. arcreader.setStrict(true); // Get the iterator: iterator = arcreader.iterator(); this.archiveName = paths[currentPath].getName(); return true; }
private boolean nextFile() throws IOException { currentPath++; if (currentPath >= paths.length) { return false; } // Output the archive filename, to help with debugging: log.info("Opening nextFile: " + paths[currentPath]); // Set up the ArchiveReader: this.status = this.filesystem.getFileStatus(paths[currentPath]); datainputstream = this.filesystem.open(paths[currentPath]); arcreader = (ArchiveReader) ArchiveReaderFactory.get( paths[currentPath].getName(), datainputstream, true); // Set to strict reading, in order to cope with malformed archive files // which cause an infinite loop otherwise. arcreader.setStrict(true); // Get the iterator: iterator = arcreader.iterator(); this.archiveName = paths[currentPath].getName(); return true; }
private boolean nextFile() throws IOException { currentPath++; if (currentPath >= paths.length) { return false; } // Output the archive filename, to help with debugging: log.info("Opening nextFile: " + paths[currentPath]); // Set up the ArchiveReader: this.status = this.filesystem.getFileStatus(paths[currentPath]); datainputstream = this.filesystem.open(paths[currentPath]); arcreader = (ArchiveReader) ArchiveReaderFactory.get( paths[currentPath].getName(), datainputstream, true); // Set to strict reading, in order to cope with malformed archive files // which cause an infinite loop otherwise. arcreader.setStrict(true); // Get the iterator: iterator = arcreader.iterator(); this.archiveName = paths[currentPath].getName(); return true; }
private boolean nextFile() throws IOException { currentPath++; if (currentPath >= paths.length) { return false; } // Output the archive filename, to help with debugging: log.info("Opening nextFile: " + paths[currentPath]); // Set up the ArchiveReader: this.status = this.filesystem.getFileStatus(paths[currentPath]); datainputstream = this.filesystem.open(paths[currentPath]); arcreader = (ArchiveReader) ArchiveReaderFactory .get(paths[currentPath].getName(), datainputstream, true); // Set to strict reading, in order to cope with malformed archive files // which cause an infinite loop otherwise. arcreader.setStrict(true); // Get the iterator: iterator = arcreader.iterator(); this.archiveName = paths[currentPath].getName(); return true; }
arcreader = ArchiveReaderFactory.get(path.getName(), datainputstream, true); arcreader.setStrict(false); if (path.getName().matches("^.+\\.warc(\\.gz)?$")) { archiveIterator = warcIndexer
arcreader = ArchiveReaderFactory.get(path.getName(), datainputstream, true); arcreader.setStrict(false); if (path.getName().matches("^.+\\.warc(\\.gz)?$")) { archiveIterator = warcIndexer
List<ArchiveRecordHeader> hdrList = new ArrayList<ArchiveRecordHeader>(); int recordCount = 0; setStrict(true); for (Iterator<ArchiveRecord> i = iterator(); i.hasNext();) { recordCount++;
List<ArchiveRecordHeader> hdrList = new ArrayList<ArchiveRecordHeader>(); int recordCount = 0; setStrict(true); for (Iterator<ArchiveRecord> i = iterator(); i.hasNext();) { recordCount++;
List<ArchiveRecordHeader> hdrList = new ArrayList<ArchiveRecordHeader>(); int recordCount = 0; setStrict(true); for (Iterator<ArchiveRecord> i = iterator(); i.hasNext();) { recordCount++;
arcreader.setStrict(false); WarcIndexer warcIndexer = new WarcIndexer(); ArcIndexer arcIndexer = new ArcIndexer();
arcreader.setStrict(false); WarcIndexer warcIndexer = new WarcIndexer(); ArcIndexer arcIndexer = new ArcIndexer();