@Test(expectedExceptions = {SAMException.class}, dataProvider = "TestFailReference") public void testFailGet(final String fileName, final int index1, final int index2) throws SAMException { final Path refPath = Paths.get(fileName); final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refPath); try { refWalker.get(index1); refWalker.get(index2); } finally { CloserUtil.close(refWalker); } }
ref = null; } else { ref = walker.get(rec.getReferenceIndex());
@Test(dataProvider = "TestReference") public void testGetFile(final String fileName, final int index1, final int index2) throws SAMException { final File refFile = new File(fileName); final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refFile); ReferenceSequence sequence = refWalker.get(index1); Assert.assertEquals(sequence.getContigIndex(), index1); sequence = refWalker.get(index2); Assert.assertEquals(sequence.getContigIndex(), index2); CloserUtil.close(refWalker); }
@Test(dataProvider = "TestReference") public void testGet(final String fileName, final int index1, final int index2) throws SAMException { final Path refPath = Paths.get(fileName); final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refPath); ReferenceSequence sequence = refWalker.get(index1); Assert.assertEquals(sequence.getContigIndex(), index1); sequence = refWalker.get(index2); Assert.assertEquals(sequence.getContigIndex(), index2); CloserUtil.close(refWalker); }
/** Calculates and sets UQ tag from the record and the reference * * @param record the record to be fixed * @param refSeqWalker a ReferenceSequenceWalker that will be used to traverse the reference * @param isBisulfiteSequence a flag indicating whether the sequence came from bisulfite-sequencing. * * No return value, modifies the provided record. */ public static void fixUq(final SAMRecord record, final ReferenceSequenceFileWalker refSeqWalker, final boolean isBisulfiteSequence) { if (record.getBaseQualities() != SAMRecord.NULL_QUALS) { final byte[] referenceBases = refSeqWalker.get(record.getReferenceIndex()).getBases(); record.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(record, referenceBases, 0, isBisulfiteSequence)); } }
/** Calculates and sets UQ tag from the record and the reference * * @param record the record to be fixed * @param refSeqWalker a ReferenceSequenceWalker that will be used to traverse the reference * @param isBisulfiteSequence a flag indicating whether the sequence came from bisulfite-sequencing. * * No return value, modifies the provided record. */ public static void fixUq(final SAMRecord record, final ReferenceSequenceFileWalker refSeqWalker, final boolean isBisulfiteSequence) { if (record.getBaseQualities() != SAMRecord.NULL_QUALS) { final byte[] referenceBases = refSeqWalker.get(record.getReferenceIndex()).getBases(); record.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(record, referenceBases, 0, isBisulfiteSequence)); } }
/** * Ensure that the requested sequence is loaded. Throws an exception if out-of-order * request is made, or if there is a mismatch between the requested name and the name * found in the ReferenceSequenceFile */ public ReferenceSequence get(final int sequenceIndex, final String sequenceName, final int length) { // Has the side-effect of setting referenceSequence member get(sequenceIndex); if (!referenceSequence.getName().equals(sequenceName)) { // Sanity check the sequence names against the sequence dictionary while scanning through. throw new SAMException("Sequence name mismatch at sequence index (" + referenceSequence.getContigIndex() + ", " + referenceSequence.getName() + ") != " + sequenceName); } if (referenceSequence.getBases().length != length) { throw new SAMException("Sequence length mismatch for (" + sequenceIndex + ", " + sequenceName + "). expected " + length + " but found " + referenceSequence.getBases().length); } return referenceSequence; }
/** * Ensure that the requested sequence is loaded. Throws an exception if out-of-order * request is made, or if there is a mismatch between the requested name and the name * found in the ReferenceSequenceFile */ public ReferenceSequence get(final int sequenceIndex, final String sequenceName, final int length) { // Has the side-effect of setting referenceSequence member get(sequenceIndex); if (!referenceSequence.getName().equals(sequenceName)) { // Sanity check the sequence names against the sequence dictionary while scanning through. throw new SAMException("Sequence name mismatch at sequence index (" + referenceSequence.getContigIndex() + ", " + referenceSequence.getName() + ") != " + sequenceName); } if (referenceSequence.getBases().length != length) { throw new SAMException("Sequence length mismatch for (" + sequenceIndex + ", " + sequenceName + "). expected " + length + " but found " + referenceSequence.getBases().length); } return referenceSequence; }
/** * Ensure that the requested sequence is loaded. Throws an exception if out-of-order * request is made, or if there is a mismatch between the requested name and the name * found in the ReferenceSequenceFile */ public ReferenceSequence get(final int sequenceIndex, final String sequenceName, final int length) { // Has the side-effect of setting referenceSequence member get(sequenceIndex); if (!referenceSequence.getName().equals(sequenceName)) { // Sanity check the sequence names against the sequence dictionary while scanning through. throw new SAMException("Sequence name mismatch at sequence index (" + referenceSequence.getContigIndex() + ", " + referenceSequence.getName() + ") != " + sequenceName); } if (referenceSequence.getBases().length != length) { throw new SAMException("Sequence length mismatch for (" + sequenceIndex + ", " + sequenceName + "). expected " + length + " but found " + referenceSequence.getBases().length); } return referenceSequence; }
@Override public SAMLocusAndReference next() { final LocusInfo locus = locusIterator.next(); final ReferenceSequence referenceSequence = referenceSequenceFileWalker.get(locus.getSequenceIndex(), locus.getSequenceName(), locus.getSequenceLength()); //position is 1-based...arrays are 0-based! return new SAMLocusAndReference(locus, referenceSequence.getBases()[locus.getPosition() - 1]); }
@Override public SAMLocusAndReference next() { final LocusInfo locus = locusIterator.next(); final ReferenceSequence referenceSequence = referenceSequenceFileWalker.get(locus.getSequenceIndex(), locus.getSequenceName(), locus.getSequenceLength()); //position is 1-based...arrays are 0-based! return new SAMLocusAndReference(locus, referenceSequence.getBases()[locus.getPosition() - 1]); }
/** Calculates and sets the NM, MD, and and UQ tags from the record and the reference * * @param record the record to be fixed * @param refSeqWalker a ReferenceSequenceWalker that will be used to traverse the reference * @param isBisulfiteSequence a flag indicating whether the sequence came from bisulfite-sequencing which would imply a different * calculation of the NM tag. * * No return value, modifies the provided record. */ public static void fixNmMdAndUq(final SAMRecord record, final ReferenceSequenceFileWalker refSeqWalker, final boolean isBisulfiteSequence) { final byte[] referenceBases = refSeqWalker.get(record.getReferenceIndex()).getBases(); // only recalculate NM if it isn't bisulfite, since it needs to be treated specially below SequenceUtil.calculateMdAndNmTags(record, referenceBases, true, !isBisulfiteSequence); if (isBisulfiteSequence) { // recalculate the NM tag for bisulfite data record.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(record, referenceBases, 0, isBisulfiteSequence)); } fixUq(record, refSeqWalker, isBisulfiteSequence); }
/** Calculates and sets the NM, MD, and and UQ tags from the record and the reference * * @param record the record to be fixed * @param refSeqWalker a ReferenceSequenceWalker that will be used to traverse the reference * @param isBisulfiteSequence a flag indicating whether the sequence came from bisulfite-sequencing which would imply a different * calculation of the NM tag. * * No return value, modifies the provided record. */ public static void fixNmMdAndUq(final SAMRecord record, final ReferenceSequenceFileWalker refSeqWalker, final boolean isBisulfiteSequence) { final byte[] referenceBases = refSeqWalker.get(record.getReferenceIndex()).getBases(); // only recalculate NM if it isn't bisulfite, since it needs to be treated specially below SequenceUtil.calculateMdAndNmTags(record, referenceBases, true, !isBisulfiteSequence); if (isBisulfiteSequence) { // recalculate the NM tag for bisulfite data record.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(record, referenceBases, 0, isBisulfiteSequence)); } fixUq(record, refSeqWalker, isBisulfiteSequence); }
/** * Method gets the data from iterator for each locus and processes it with the help of collector. */ @Override public void processFile() { long counter = 0; while (iterator.hasNext()) { final AbstractLocusInfo<T> info = iterator.next(); final ReferenceSequence ref = refWalker.get(info.getSequenceIndex()); boolean referenceBaseN = collector.isReferenceBaseN(info.getPosition(), ref); collector.addInfo(info, ref, referenceBaseN); if (referenceBaseN) { continue; } progress.record(info.getSequenceName(), info.getPosition()); if (collector.isTimeToStop(++counter)) { break; } collector.setCounter(counter); } // check that we added the same number of bases to the raw coverage histogram and the base quality histograms final long sumBaseQ = Arrays.stream(collector.unfilteredBaseQHistogramArray).sum(); final long sumDepthHisto = LongStream.rangeClosed(0, collector.coverageCap).map(i -> (i * collector.unfilteredDepthHistogramArray[(int) i])).sum(); if (sumBaseQ != sumDepthHisto) { log.error("Coverage and baseQ distributions contain different amount of bases!"); } }
/** * Method gets the data from iterator for each locus and processes it with the help of collector. */ @Override public void processFile() { long counter = 0; while (iterator.hasNext()) { final AbstractLocusInfo<T> info = iterator.next(); final ReferenceSequence ref = refWalker.get(info.getSequenceIndex()); boolean referenceBaseN = collector.isReferenceBaseN(info.getPosition(), ref); collector.addInfo(info, ref, referenceBaseN); if (referenceBaseN) { continue; } progress.record(info.getSequenceName(), info.getPosition()); if (collector.isTimeToStop(++counter)) { break; } collector.setCounter(counter); } // check that we added the same number of bases to the raw coverage histogram and the base quality histograms final long sumBaseQ = Arrays.stream(collector.unfilteredBaseQHistogramArray).sum(); final long sumDepthHisto = LongStream.rangeClosed(0, collector.coverageCap).map(i -> (i * collector.unfilteredDepthHistogramArray[(int) i])).sum(); if (sumBaseQ != sumDepthHisto) { log.error("Coverage and baseQ distributions contain different amount of bases!"); } }
private void validateNmTag(final SAMRecord record, final long recordNumber) { if (!record.getReadUnmappedFlag()) { final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM); if (tagNucleotideDiffs == null) { addError(new SAMValidationError( Type.MISSING_TAG_NM, "NM tag (nucleotide differences) is missing", record.getReadName(), recordNumber)); } else if (refFileWalker != null) { final ReferenceSequence refSequence = refFileWalker.get(record.getReferenceIndex()); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSequence.getBases(), 0, isBisulfiteSequenced()); if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) { addError(new SAMValidationError( Type.INVALID_TAG_NM, "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs + "] does not match reality [" + actualNucleotideDiffs + "]", record.getReadName(), recordNumber)); } } } }
private void validateNmTag(final SAMRecord record, final long recordNumber) { if (!record.getReadUnmappedFlag()) { final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM); if (tagNucleotideDiffs == null) { addError(new SAMValidationError( Type.MISSING_TAG_NM, "NM tag (nucleotide differences) is missing", record.getReadName(), recordNumber)); } else if (refFileWalker != null) { final ReferenceSequence refSequence = refFileWalker.get(record.getReferenceIndex()); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSequence.getBases(), 0, isBisulfiteSequenced()); if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) { addError(new SAMValidationError( Type.INVALID_TAG_NM, "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs + "] does not match reality [" + actualNucleotideDiffs + "]", record.getReadName(), recordNumber)); } } } }
private void validateNmTag(final SAMRecord record, final long recordNumber) { if (!record.getReadUnmappedFlag()) { final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM); if (tagNucleotideDiffs == null) { addError(new SAMValidationError( Type.MISSING_TAG_NM, "NM tag (nucleotide differences) is missing", record.getReadName(), recordNumber)); } else if (refFileWalker != null) { final ReferenceSequence refSequence = refFileWalker.get(record.getReferenceIndex()); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSequence.getBases(), 0, isBisulfiteSequenced()); if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) { addError(new SAMValidationError( Type.INVALID_TAG_NM, "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs + "] does not match reality [" + actualNucleotideDiffs + "]", record.getReadName(), recordNumber)); } } } }
progressLogger.record(samRecord); if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) { final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex()); metricsCollector.acceptRecord(samRecord, referenceSequence);
progressLogger.record(samRecord); if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) { final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex()); metricsCollector.acceptRecord(samRecord, referenceSequence);