public static void compute(Iterator<SAMRecord> rawit, SAMFileWriter writer, ReferenceLookup reference, Set<String> tags, boolean softenHardClips, boolean fixMates, boolean fixDuplicates, boolean recalculateSupplementary, String threadprefix) throws IOException { ProgressLogger progress = new ProgressLogger(log); try (CloseableIterator<SAMRecord> aysncit = new AsyncBufferedIterator<SAMRecord>(rawit, threadprefix + "raw")) { Iterator<SAMRecord> it = aysncit; if (tags.contains(SAMTag.NM.name()) || tags.contains(SAMTag.SA.name())) { it = new AsyncBufferedIterator<SAMRecord>(it, threadprefix + "nm"); it = new NmTagIterator(it, reference); } if (!Sets.intersection(tags, SAMRecordUtil.TEMPLATE_TAGS).isEmpty() || softenHardClips) { it = new TemplateTagsIterator(it, softenHardClips, fixMates, fixDuplicates, recalculateSupplementary, tags); it = new AsyncBufferedIterator<SAMRecord>(it, threadprefix + "tags"); } while (it.hasNext()) { SAMRecord r = it.next(); writer.addAlignment(r); progress.record(r); } } } private boolean isReferenceRequired() {
final ProgressLogger firstPassProgress = new ProgressLogger(log, 1000000, "Counted"); if (TOTAL_READS_IN_INPUT == 0) { final SamReader firstPassReader = readerFactory.referenceSequence(REFERENCE_SEQUENCE).open(INPUT); "is known, use TOTAL_READS_IN_INPUT to skip first traversal."); for (SAMRecord rec : firstPassReader) { firstPassProgress.record(rec); log.info(String.format("First pass traversal to count number of reads ended, found %d total reads.", firstPassProgress.getCount())); final long totalReads = TOTAL_READS_IN_INPUT == 0 ? firstPassProgress.getCount() : TOTAL_READS_IN_INPUT; final ProgressLogger progress = new ProgressLogger(log); for (SAMRecord currentRecord : reader) { if (readsWritten >= readsPerFile && !lastReadName.equals(currentRecord.getReadName())) { lastReadName = currentRecord.getReadName(); readsWritten++; progress.record(currentRecord); if (progress.getCount() != totalReads) { log.warn(String.format("The totalReads (%d) provided does not match the reads found in the " + "input file (%d). Files may not be split evenly or number of files may not " + "match what was requested. There were %d files generated each with around %d " + "reads except the last file which contained %d reads.", totalReads, progress.getCount(), fileIndex - 1, readsPerFile, readsWritten) );
ProgressLogger progress = new ProgressLogger(log, 100000, "Validated Read"); SamFileValidator v = new SamFileValidator(new PrintWriter(System.out), 1); final SamReader reader = SamReaderFactory.make().referenceSequence(params.reference).open(params.cramFile); log.info("Elapsed seconds: " + progress.getElapsedSeconds());
public synchronized boolean record(final String chrom, final int pos) { if (this.lastStartTime == -1) this.lastStartTime = System.currentTimeMillis(); if (++this.processed % this.n == 0) { final long now = System.currentTimeMillis(); final long lastPeriodSeconds = (now - this.lastStartTime) / 1000; this.lastStartTime = now; final long seconds = (System.currentTimeMillis() - startTime) / 1000; final String elapsed = formatElapseTime(seconds); final String period = pad(fmt.format(lastPeriodSeconds), 4); final String processed = pad(fmt.format(this.processed), 13); final String readInfo; if (chrom == null) readInfo = "*/*"; else readInfo = chrom + ":" + fmt.format(pos); log.info(this.verb, " ", processed, " " + noun + ". Elapsed time: ", elapsed, "s. Time for last ", fmt.format(this.n), ": ", period, "s. Last read position: ", readInfo); return true; } else { return false; } }
@Override protected int doWork() { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true); if (!refFile.isIndexed()) { throw new IllegalStateException("Reference file must be indexed, but no index file was found"); } if (refFile.getSequenceDictionary() == null) { throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found"); } // get the intervals final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE); log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())); /********************************** * Now output regions for calling * **********************************/ final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone()); log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE)); intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add); log.info("Writing Intervals."); outputIntervals.write(OUTPUT); log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())); return 0; }
@SuppressWarnings("unused") private Iterator<SAMRecord> getAllAssemblies_single_threaded() { ProgressLogger progressLog = new ProgressLogger(log); List<Iterator<SAMRecord>> list = new ArrayList<>(); for (BreakendDirection direction : BreakendDirection.values()) { CloseableIterator<DirectedEvidence> it = mergedIterator(source, false); Iterator<DirectedEvidence> throttledIt = throttled(it); ProgressLoggingDirectedEvidenceIterator<DirectedEvidence> loggedIt = new ProgressLoggingDirectedEvidenceIterator<>(getContext(), throttledIt, progressLog); Iterator<SAMRecord> evidenceIt = new PositionalAssembler(getContext(), this, new SequentialIdGenerator("asm"), loggedIt, direction); list.add(evidenceIt); } return Iterators.concat(list.iterator()); } /**
final ProgressLogger progressLogger = new ProgressLogger(log, 100000); long nTotalLoci = 0; long nSkippedLoci = 0; progressLogger.record(info.getLocus().getSequenceName(), info.getLocus().getPosition()); "Computation took %d seconds.", nTotalLoci, nProcessedLoci, nSkippedLoci, progressLogger.getElapsedSeconds()));
/** Records multiple SAMRecords and triggers logging if necessary. */ public boolean record(final SAMRecord... recs) { boolean triggered = false; for (final SAMRecord rec : recs) triggered = record(rec) || triggered; return triggered; }
ProgressLoggingSAMRecordIterator logit = new ProgressLoggingSAMRecordIterator(rawit, new ProgressLogger(log));
final ProgressLogger progressLogger = new ProgressLogger(log, 100000); long nTotalLoci = 0; long nSkippedLoci = 0; progressLogger.record(info.getLocus().getSequenceName(), info.getLocus().getPosition()); "Computation took %d seconds.", nTotalLoci, nProcessedLoci, nSkippedLoci, progressLogger.getElapsedSeconds()));
/** * Records that a given record has been processed and triggers logging if necessary. * @return boolean true if logging was triggered, false otherwise */ public synchronized boolean record(final SAMRecord rec) { if (rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { return record(null, 0); } else { return record(rec.getReferenceName(), rec.getAlignmentStart()); } }
@Override protected int doWork() { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true); if (!refFile.isIndexed()) { throw new IllegalStateException("Reference file must be indexed, but no index file was found"); } if (refFile.getSequenceDictionary() == null) { throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found"); } // get the intervals final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE); log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())); /********************************** * Now output regions for calling * **********************************/ final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone()); log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE)); intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add); log.info("Writing Intervals."); outputIntervals.write(OUTPUT); log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())); return 0; }
protected void saveVcf(File file, Iterator<IdsvVariantContext> calls) throws IOException { File tmp = gridss.Defaults.OUTPUT_TO_TEMP_FILE ? FileSystemContext.getWorkingFileFor(file) : file; final ProgressLogger writeProgress = new ProgressLogger(log); try (VariantContextWriter vcfWriter = getContext().getVariantContextWriter(tmp, getOutputHeader(), true)) { while (calls.hasNext()) { IdsvVariantContext record = calls.next(); vcfWriter.add(record); writeProgress.record(record.getContig(), record.getStart()); } } if (tmp != file) { FileHelper.move(tmp, file, true); } } protected VCFHeader getInputHeader() {
final ProgressLogger firstPassProgress = new ProgressLogger(log, 1000000, "Counted"); if (TOTAL_READS_IN_INPUT == 0) { final SamReader firstPassReader = readerFactory.referenceSequence(REFERENCE_SEQUENCE).open(INPUT); "is known, use TOTAL_READS_IN_INPUT to skip first traversal."); for (SAMRecord rec : firstPassReader) { firstPassProgress.record(rec); log.info(String.format("First pass traversal to count number of reads ended, found %d total reads.", firstPassProgress.getCount())); final long totalReads = TOTAL_READS_IN_INPUT == 0 ? firstPassProgress.getCount() : TOTAL_READS_IN_INPUT; final ProgressLogger progress = new ProgressLogger(log); for (SAMRecord currentRecord : reader) { if (readsWritten >= readsPerFile && !lastReadName.equals(currentRecord.getReadName())) { lastReadName = currentRecord.getReadName(); readsWritten++; progress.record(currentRecord); if (progress.getCount() != totalReads) { log.warn(String.format("The totalReads (%d) provided does not match the reads found in the " + "input file (%d). Files may not be split evenly or number of files may not " + "match what was requested. There were %d files generated each with around %d " + "reads except the last file which contained %d reads.", totalReads, progress.getCount(), fileIndex - 1, readsPerFile, readsWritten) );
progressLoggerForKeeper = new ProgressLogger(log, 10000, "compared", "ReadEnds to keeper"); progressLoggerForRest = new ProgressLogger(log, 1000, "compared", "ReadEnds to others");
/** * Add record if it is primary or optionally secondary. */ private void addIfNotFiltered(final Sink out, final SAMRecord rec) { if (includeSecondaryAlignments || !rec.getNotPrimaryAlignmentFlag()) { out.add(rec); if (this.progress.record(rec) && crossSpeciesReads > 0) { log.info(String.format("%d Reads have been unmapped due to being suspected of being Cross-species contamination.", crossSpeciesReads)); } } }
public static void main(String[] args) throws IOException { if (args.length < 2) { System.out.println("Usage: " + PrintReadsExample.class.getCanonicalName() + " inFile eagerDecode [outFile]"); System.exit(1); } final File inputFile = new File(args[0]); final boolean eagerDecode = Boolean.parseBoolean(args[1]); //useful to test (realistic) scenarios in which every record is always fully decoded. final File outputFile = args.length >= 3 ? new File(args[2]) : null; final long start = System.currentTimeMillis(); log.info("Start with args:" + Arrays.toString(args)); printConfigurationInfo(); SamReaderFactory readerFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); if (eagerDecode) { readerFactory = readerFactory.enable(SamReaderFactory.Option.EAGERLY_DECODE); } try (final SamReader reader = readerFactory.open(inputFile)) { final SAMFileHeader header = reader.getFileHeader(); try (final SAMFileWriter writer = outputFile != null ? new SAMFileWriterFactory().makeBAMWriter(header, true, outputFile) : null) { final ProgressLogger pl = new ProgressLogger(log, 1000000); for (final SAMRecord record : reader) { if (writer != null) { writer.addAlignment(record); } pl.record(record); } } } final long end = System.currentTimeMillis(); log.info(String.format("Done. Elapsed time %.3f seconds", (end - start) / 1000.0)); }
private void filterReads(final FilteringSamIterator filteringIterator) { // get OUTPUT header from INPUT and overwrite it if necessary final SAMFileHeader fileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT); final SAMFileHeader.SortOrder inputSortOrder = fileHeader.getSortOrder(); if (SORT_ORDER != null) { fileHeader.setSortOrder(SORT_ORDER); } if (FILTER == Filter.includePairedIntervals && fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new UnsupportedOperationException("Input must be coordinate sorted to use includePairedIntervals"); } final boolean presorted = inputSortOrder.equals(fileHeader.getSortOrder()); log.info("Filtering [presorted=" + presorted + "] " + INPUT.getName() + " -> OUTPUT=" + OUTPUT.getName() + " [sortorder=" + fileHeader.getSortOrder().name() + "]"); // create OUTPUT file final SAMFileWriter outputWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, presorted, OUTPUT); final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Written"); while (filteringIterator.hasNext()) { final SAMRecord rec = filteringIterator.next(); outputWriter.addAlignment(rec); progress.record(rec); } filteringIterator.close(); outputWriter.close(); log.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName()); }
progressLoggerForKeeper = new ProgressLogger(log, 10000, "compared", "ReadEnds to keeper"); progressLoggerForRest = new ProgressLogger(log, 1000, "compared", "ReadEnds to others");
/** * Add record if it is primary or optionally secondary. */ private void addIfNotFiltered(final Sink out, final SAMRecord rec) { if (includeSecondaryAlignments || !rec.getNotPrimaryAlignmentFlag()) { out.add(rec); if (this.progress.record(rec) && crossSpeciesReads > 0) { log.info(String.format("%d Reads have been unmapped due to being suspected of being Cross-species contamination.", crossSpeciesReads)); } } }