@Override protected int doWork() { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true); if (!refFile.isIndexed()) { throw new IllegalStateException("Reference file must be indexed, but no index file was found"); } if (refFile.getSequenceDictionary() == null) { throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found"); } // get the intervals final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE); log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())); /********************************** * Now output regions for calling * **********************************/ final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone()); log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE)); intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add); log.info("Writing Intervals."); outputIntervals.write(OUTPUT); log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())); return 0; }
@Override protected int doWork() { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true); if (!refFile.isIndexed()) { throw new IllegalStateException("Reference file must be indexed, but no index file was found"); } if (refFile.getSequenceDictionary() == null) { throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found"); } // get the intervals final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE); log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())); /********************************** * Now output regions for calling * **********************************/ final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone()); log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE)); intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add); log.info("Writing Intervals."); outputIntervals.write(OUTPUT); log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())); return 0; }
log.info(String.format("First pass traversal to count number of reads ended, found %d total reads.", firstPassProgress.getCount())); final long totalReads = TOTAL_READS_IN_INPUT == 0 ? firstPassProgress.getCount() : TOTAL_READS_IN_INPUT; CloserUtil.close(reader); if (progress.getCount() != totalReads) { log.warn(String.format("The totalReads (%d) provided does not match the reads found in the " + "input file (%d). Files may not be split evenly or number of files may not " + "match what was requested. There were %d files generated each with around %d " + "reads except the last file which contained %d reads.", totalReads, progress.getCount(), fileIndex - 1, readsPerFile, readsWritten) );
log.info(String.format("First pass traversal to count number of reads ended, found %d total reads.", firstPassProgress.getCount())); final long totalReads = TOTAL_READS_IN_INPUT == 0 ? firstPassProgress.getCount() : TOTAL_READS_IN_INPUT; CloserUtil.close(reader); if (progress.getCount() != totalReads) { log.warn(String.format("The totalReads (%d) provided does not match the reads found in the " + "input file (%d). Files may not be split evenly or number of files may not " + "match what was requested. There were %d files generated each with around %d " + "reads except the last file which contained %d reads.", totalReads, progress.getCount(), fileIndex - 1, readsPerFile, readsWritten) );
private void filterReads(final FilteringSamIterator filteringIterator) { // get OUTPUT header from INPUT and overwrite it if necessary final SAMFileHeader fileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT); final SAMFileHeader.SortOrder inputSortOrder = fileHeader.getSortOrder(); if (SORT_ORDER != null) { fileHeader.setSortOrder(SORT_ORDER); } if (FILTER == Filter.includePairedIntervals && fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new UnsupportedOperationException("Input must be coordinate sorted to use includePairedIntervals"); } final boolean presorted = inputSortOrder.equals(fileHeader.getSortOrder()); log.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> OUTPUT=" + OUTPUT.getName() + " [sortorder=" + fileHeader.getSortOrder().name() + "]"); // create OUTPUT file final SAMFileWriter outputWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, presorted, OUTPUT); final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Written"); while (filteringIterator.hasNext()) { final SAMRecord rec = filteringIterator.next(); outputWriter.addAlignment(rec); progress.record(rec); } filteringIterator.close(); outputWriter.close(); log.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName()); }
private void filterReads(final FilteringSamIterator filteringIterator) { // get OUTPUT header from INPUT and overwrite it if necessary final SAMFileHeader fileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT); final SAMFileHeader.SortOrder inputSortOrder = fileHeader.getSortOrder(); if (SORT_ORDER != null) { fileHeader.setSortOrder(SORT_ORDER); } if (FILTER == Filter.includePairedIntervals && fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new UnsupportedOperationException("Input must be coordinate sorted to use includePairedIntervals"); } final boolean presorted = inputSortOrder.equals(fileHeader.getSortOrder()); log.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> OUTPUT=" + OUTPUT.getName() + " [sortorder=" + fileHeader.getSortOrder().name() + "]"); // create OUTPUT file final SAMFileWriter outputWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, presorted, OUTPUT); final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Written"); while (filteringIterator.hasNext()) { final SAMRecord rec = filteringIterator.next(); outputWriter.addAlignment(rec); progress.record(rec); } filteringIterator.close(); outputWriter.close(); log.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName()); }
if (0 < CHECK_FIRST_N_RECORDS && CHECK_FIRST_N_RECORDS <= progress.getCount()) break;
if (stopAfter > 0 && progress.getCount() >= stopAfter) { break;
if (0 < CHECK_FIRST_N_RECORDS && CHECK_FIRST_N_RECORDS <= progress.getCount()) break;
final long recordNumber = progress.getCount() + 1; final Collection<SAMValidationError> errors = record.isValid(); if (errors != null) { if (progress.getCount() > 0) { // Avoid exception being thrown as a result of no qualities being read final FastqQualityFormat format = qualityDetector.generateBestGuess(QualityEncodingDetector.FileContext.SAM, FastqQualityFormat.Standard); if (format != FastqQualityFormat.Standard) { final String msg = "SAMFormatException on record " + progress.getCount() + 1; out.println(msg); throw new SAMException(msg, e);
final long recordNumber = progress.getCount() + 1; final Collection<SAMValidationError> errors = record.isValid(); if (errors != null) { if (progress.getCount() > 0) { // Avoid exception being thrown as a result of no qualities being read final FastqQualityFormat format = qualityDetector.generateBestGuess(QualityEncodingDetector.FileContext.SAM, FastqQualityFormat.Standard); if (format != FastqQualityFormat.Standard) { final String msg = "SAMFormatException on record " + progress.getCount() + 1; out.println(msg); throw new SAMException(msg, e);
final long recordNumber = progress.getCount() + 1; final Collection<SAMValidationError> errors = record.isValid(); if (errors != null) { if (progress.getCount() > 0) { // Avoid exception being thrown as a result of no qualities being read final FastqQualityFormat format = qualityDetector.generateBestGuess(QualityEncodingDetector.FileContext.SAM, FastqQualityFormat.Standard); if (format != FastqQualityFormat.Standard) { final String msg = "SAMFormatException on record " + progress.getCount() + 1; out.println(msg); throw new SAMException(msg, e);
if (stopAfter > 0 && progress.getCount() >= stopAfter) { break;
log.info("Processed " + progress.getCount() + " records"); log.info("Marking " + numDuplicates + " records as duplicates.");
log.info("Processed " + progress.getCount() + " records"); log.info("Marking " + numDuplicates + " records as duplicates.");
if (stopAfter > 0 && progress.getCount() >= stopAfter) { break;
log.info("Processed " + progress.getCount() + " records"); log.info("Found " + iterator.getNumRecordsWithNoMateCigar() + " records with no mate cigar optional tag."); log.info("Marking " + iterator.getNumDuplicates() + " records as duplicates.");
if (stopAfter > 0 && progress.getCount() >= stopAfter) { break;
log.info("Processed " + progress.getCount() + " records"); log.info("Found " + iterator.getNumRecordsWithNoMateCigar() + " records with no mate cigar optional tag."); log.info("Marking " + iterator.getNumDuplicates() + " records as duplicates.");