/** * Convenience method for retrieving int attribute */ public static int getIntAttribute(SAMRecord read, String attribute) { Integer num = read.getIntegerAttribute(attribute); if (num == null) { return 0; } else { return num; } }
public int getAssemblyMaxReadLength() { return record.getIntegerAttribute(SamTags.ASSEMBLY_MAX_READ_LENGTH); } public BreakendDirection getAssemblyDirection() {
private static Integer reportedAlignments(SAMRecord r) { if (r.getIntegerAttribute(SAMTag.NH.name()) != null) { // NH i Number of reported alignments that contains the query in the current record return r.getIntegerAttribute(SAMTag.NH.name()); } if (r.getIntegerAttribute(SAMTag.IH.name()) != null) { // IH i Number of stored alignments in SAM that contains the query in the current record return r.getIntegerAttribute(SAMTag.IH.name()); } return null; } /**
public int compare(final SAMRecord rec1, final SAMRecord rec2) { final Integer hi1 = rec1.getIntegerAttribute(SAMTag.HI.name()); final Integer hi2 = rec2.getIntegerAttribute(SAMTag.HI.name()); if (hi1 == null) { if (hi2 == null) return 0; else return 1; } else if (hi2 == null) { return -1; } else { return hi1.compareTo(hi2); } } }
public int compare(final SAMRecord rec1, final SAMRecord rec2) { final Integer hi1 = rec1.getIntegerAttribute(SAMTag.HI.name()); final Integer hi2 = rec2.getIntegerAttribute(SAMTag.HI.name()); if (hi1 == null) { if (hi2 == null) return 0; else return 1; } else if (hi2 == null) { return -1; } else { return hi1.compareTo(hi2); } } }
/** * Encodes mapping information from a record into a string according to the format sepcified in the * Sam-Spec under the SA tag. No protection against missing values (for cigar, and NM tag). * (Might make sense to move this to htsJDK.) * * @param rec SAMRecord whose alignment information will be encoded * @return String encoding rec's alignment information according to SA tag in the SAM spec */ static private String encodeMappingInformation(SAMRecord rec) { return String.join(",", rec.getContig(), ((Integer) rec.getAlignmentStart()).toString(), rec.getCigarString(), ((Integer) rec.getMappingQuality()).toString(), getStringOfNullable(rec.getIntegerAttribute(SAMTag.NM.name()))) + ";"; }
/** * Encodes mapping information from a record into a string according to the format sepcified in the * Sam-Spec under the SA tag. No protection against missing values (for cigar, and NM tag). * (Might make sense to move this to htsJDK.) * * @param rec SAMRecord whose alignment information will be encoded * @return String encoding rec's alignment information according to SA tag in the SAM spec */ static private String encodeMappingInformation(SAMRecord rec) { return String.join(",", rec.getContig(), ((Integer) rec.getAlignmentStart()).toString(), rec.getCigarString(), ((Integer) rec.getMappingQuality()).toString(), getStringOfNullable(rec.getIntegerAttribute(SAMTag.NM.name()))) + ";"; }
/** * Returns original edit distance as set in YX tag. */ public static int getOrigEditDistance(SAMRecord read) { Integer distance = null; if (read.getReadUnmappedFlag()) { distance = read.getReadLength(); } else { distance = read.getIntegerAttribute("YX"); if (distance == null) { distance = read.getReadLength(); } } return distance; }
@Test public void testGetTypedAttributeMethods() throws Exception { final SAMRecord rec = writeAndReadSamRecord("bam"); Assert.assertEquals(rec.getByteAttribute(INTEGER_TAG).intValue(), 1); Assert.assertEquals(rec.getShortAttribute(INTEGER_TAG).intValue(), 1); Assert.assertEquals(rec.getIntegerAttribute(INTEGER_TAG).intValue(), 1); }
@Override public Integer stratify(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { Integer numberMismatches = recordAndOffset.getRecord().getIntegerAttribute(SAMTag.NM.name()); // Record may not contain an NM tag in which case we cannot stratify over it. if (numberMismatches == null) { return null; } // We are only interested in the number of errors on the read // not including the current base. if (recordAndOffset.getReadBase() != locusInfo.getReferenceBase()) { return numberMismatches - 1; } return numberMismatches; }
@Override public Integer stratify(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { Integer numberMismatches = recordAndOffset.getRecord().getIntegerAttribute(SAMTag.NM.name()); // Record may not contain an NM tag in which case we cannot stratify over it. if (numberMismatches == null) { return null; } // We are only interested in the number of errors on the read // not including the current base. if (recordAndOffset.getReadBase() != locusInfo.getReferenceBase()) { return numberMismatches - 1; } return numberMismatches; }
/** * 0-1 scaled percentage identity of mapped read bases. * * @return portion of reference-aligned bases that match reference */ public static float getAlignedIdentity(SAMRecord record) { Integer nm = record.getIntegerAttribute(SAMTag.NM.name()); if (nm != null) { int refBasesToConsider = record.getReadLength() - SAMRecordUtil.getStartSoftClipLength(record) - SAMRecordUtil.getEndSoftClipLength(record); int refBaseMatches = refBasesToConsider - nm + SequenceUtil.countInsertedBases(record) + SequenceUtil.countDeletedBases(record); return refBaseMatches / (float) refBasesToConsider; } String md = record.getStringAttribute(SAMTag.MD.name()); if (StringUtils.isNotEmpty(md)) { // Socrates handles this: should we? Which aligners write MD but not // NM? throw new RuntimeException("Not Yet Implemented: calculation from reads with MD tag but not NM tag"); } throw new IllegalStateException(String.format("Read %s missing NM tag", record.getReadName())); }
/** * Should be an exception if a typed attribute call is made for the wrong type. */ @Test(expectedExceptions = RuntimeException.class) public void testGetTypedAttributeForWrongType() throws Exception { final SAMRecord rec = createSamRecord(); rec.setAttribute(STRING_TAG, "Hello, World!"); writeAndReadSamRecord("bam", rec); rec.getIntegerAttribute(STRING_TAG); Assert.fail("Exception should have been thrown."); }
/** * The index of segment in the template * * @param record * SAMRecord * @return The index of segment in the template */ public static final int getSegmentIndex(SAMRecord record) { Integer hiTag = record.getIntegerAttribute(SAMTag.FI.name()); if (hiTag != null) return hiTag; if (record.getReadPairedFlag()) { if (record.getFirstOfPairFlag()) return 0; if (record.getSecondOfPairFlag()) return 1; } return 0; // default to the first segment as per sam specs }
public ChimericAlignment(SAMRecord r) { this.rname = r.getReferenceName(); this.pos = r.getAlignmentStart(); this.isNegativeStrand = r.getReadNegativeStrandFlag(); this.cigar = r.getCigar(); this.mapq = r.getMappingQuality(); this.nm = r.getIntegerAttribute(SAMTag.NM.name()); } public ChimericAlignment(String str) {
/** * Converts a record with mapq below the given mapq threshold to unmapped * reads * * @param record * SAMRecord to convert * @param minMapq * minimum MAPQ to avoid unmapping */ public static SAMRecord lowMapqToUnmapped(SAMRecord record, double minMapq) { boolean primaryUnmapped = lowMapqToUnmapped_SA(record, minMapq); if (record.getMappingQuality() < minMapq || primaryUnmapped) { record.setReadUnmappedFlag(true); } if (record.getReadPairedFlag()) { Integer mateMapq = record.getIntegerAttribute(SAMTag.MQ.name()); if (mateMapq != null && mateMapq < minMapq) { record.setMateUnmappedFlag(true); } } return record; }
@Test public void testCalculateNmTag() { final File TEST_DIR = new File("src/test/resources/htsjdk/samtools/SequenceUtil"); final File referenceFile = new File(TEST_DIR, "reference_with_lower_and_uppercase.fasta"); final File samFile = new File(TEST_DIR, "upper_and_lowercase_read.sam"); SamReader reader = SamReaderFactory.makeDefault().open(samFile); ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile); reader.iterator().stream().forEach(r -> { Integer nm = SequenceUtil.calculateSamNmTag(r, ref.getSequence(r.getContig()).getBases()); String md = r.getStringAttribute(SAMTag.MD.name()); Assert.assertEquals(r.getIntegerAttribute(SAMTag.NM.name()), nm, "problem with NM in read \'" + r.getReadName() + "\':"); SequenceUtil.calculateMdAndNmTags(r, ref.getSequence(r.getContig()).getBases(), true, true); Assert.assertEquals(r.getIntegerAttribute(SAMTag.NM.name()), nm, "problem with NM in read \'" + r.getReadName() + "\':"); if (md != null) { Assert.assertEquals(r.getStringAttribute(SAMTag.MD.name()), md, "problem with MD in read \'" + r.getReadName() + "\':"); } }); }
protected void updateCigarForTrimmedOrClippedBases(final SAMRecord rec, final SAMRecord alignment) { // If the read was trimmed or not all the bases were sent for alignment, clip it final int alignmentReadLength = alignment.getReadLength(); final int originalReadLength = rec.getReadLength(); final int trimmed = (!rec.getReadPairedFlag()) || rec.getFirstOfPairFlag() ? this.read1BasesTrimmed != null ? this.read1BasesTrimmed : 0 : this.read2BasesTrimmed != null ? this.read2BasesTrimmed : 0; final int notWritten = originalReadLength - (alignmentReadLength + trimmed); // Update cigar if the mate maps off the reference createNewCigarsIfMapsOffEndOfReference(rec); rec.setCigar(CigarUtil.addSoftClippedBasesToEndsOfCigar( rec.getCigar(), rec.getReadNegativeStrandFlag(), notWritten, trimmed)); // If the adapter sequence is marked and clipAdapter is true, clip it if (this.clipAdapters && rec.getAttribute(ReservedTagConstants.XT) != null) { CigarUtil.softClip3PrimeEndOfRead(rec, rec.getIntegerAttribute(ReservedTagConstants.XT)); removeNmMdAndUqTags(rec); // these tags are now invalid! } }
protected void updateCigarForTrimmedOrClippedBases(final SAMRecord rec, final SAMRecord alignment) { // If the read was trimmed or not all the bases were sent for alignment, clip it final int alignmentReadLength = alignment.getReadLength(); final int originalReadLength = rec.getReadLength(); final int trimmed = (!rec.getReadPairedFlag()) || rec.getFirstOfPairFlag() ? this.read1BasesTrimmed != null ? this.read1BasesTrimmed : 0 : this.read2BasesTrimmed != null ? this.read2BasesTrimmed : 0; final int notWritten = originalReadLength - (alignmentReadLength + trimmed); // Update cigar if the mate maps off the reference createNewCigarsIfMapsOffEndOfReference(rec); rec.setCigar(CigarUtil.addSoftClippedBasesToEndsOfCigar( rec.getCigar(), rec.getReadNegativeStrandFlag(), notWritten, trimmed)); // If the adapter sequence is marked and clipAdapter is true, clip it if (this.clipAdapters && rec.getAttribute(ReservedTagConstants.XT) != null) { CigarUtil.softClip3PrimeEndOfRead(rec, rec.getIntegerAttribute(ReservedTagConstants.XT)); removeNmMdAndUqTags(rec); // these tags are now invalid! } }
public static SAMRecord ensureNmTag(ReferenceSequenceFile ref, SAMRecord record) { if (record == null) return record; if (record.getReadBases() == null) return record; if (record.getReadBases() == SAMRecord.NULL_SEQUENCE) return record; if (record.getIntegerAttribute(SAMTag.NM.name()) != null) return record; if (record.getReadUnmappedFlag()) return record; byte[] refSeq = ref .getSubsequenceAt(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()) .getBases(); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSeq, record.getAlignmentStart() - 1); record.setAttribute(SAMTag.NM.name(), actualNucleotideDiffs); return record; }