@Override public void setMateAlignmentStart(final int value) { if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) { initializedFields.add(LazyField.MATE_ALIGNMENT_START); } super.setMateAlignmentStart(value); }
@Override public void setMateAlignmentStart(final int value) { if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) { initializedFields.add(LazyField.MATE_ALIGNMENT_START); } super.setMateAlignmentStart(value); }
@Test public void testAllDuplicatesAllPairs() { List<SAMRecord> list = new ArrayList<SAMRecord>(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ"+ x, 0, 1, 100); read.setDuplicateReadFlag(true); read.setMateAlignmentStart(100); read.setMateReferenceIndex(0); read.setReadPairedFlag(true); list.add(read); } Set<List<SAMRecord>> myPairing = obj.uniqueReadSets(list); Assert.assertEquals(myPairing.size(), 1); // unique }
@Test public void testNoDuplicatesAllPairs() { List<SAMRecord> list = new ArrayList<SAMRecord>(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ"+ x, 0, 1, 100); if (x == 0) read.setDuplicateReadFlag(true); // one is a dup but (next line) read.setMateAlignmentStart(100); // they all have a shared start and mate start so they're dup's read.setMateReferenceIndex(0); read.setReadPairedFlag(true); list.add(read); } Set<List<SAMRecord>> myPairing = obj.uniqueReadSets(list); Assert.assertEquals(myPairing.size(), 1); // unique }
@Test public void testAllDuplicatesAllPairsDifferentPairedEnd() { List<SAMRecord> list = new ArrayList<SAMRecord>(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ" + x, 0, 1, 100); if (x == 0) read.setDuplicateReadFlag(true); // one is a dup read.setMateAlignmentStart(100 + x); read.setMateReferenceIndex(0); read.setReadPairedFlag(true); list.add(read); } Set<List<SAMRecord>> myPairing = obj.uniqueReadSets(list); Assert.assertEquals(myPairing.size(), 10); // unique } }
@Test public void testSkipMateValidation() throws IOException { final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder(); for (int i = 0; i < 5; i++) { samBuilder.addPair(String.valueOf(i), i, i, i + 100); } final Iterator<SAMRecord> records = samBuilder.iterator(); records.next().setMateReferenceName("*"); records.next().setMateAlignmentStart(Integer.MAX_VALUE); records.next().setMateAlignmentStart(records.next().getAlignmentStart() + 1); records.next().setMateNegativeStrandFlag(!records.next().getReadNegativeStrandFlag()); records.next().setMateReferenceIndex(records.next().getReferenceIndex() + 1); records.next().setMateUnmappedFlag(!records.next().getReadUnmappedFlag()); final Histogram<String> results = executeValidationWithErrorIgnoring(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE, Collections.EMPTY_LIST, true); Assert.assertNull(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_NEG_STRAND.getHistogramString())); Assert.assertNull(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_UNMAPPED.getHistogramString())); Assert.assertNull(results.get(SAMValidationError.Type.MISMATCH_MATE_ALIGNMENT_START.getHistogramString())); Assert.assertNull(results.get(SAMValidationError.Type.MISMATCH_MATE_REF_INDEX.getHistogramString())); }
/** * Sets mate pair information appropriately on a supplemental SAMRecord (e.g. from a split alignment) * using the primary alignment of the read's mate. * @param supplemental a supplemental alignment for the mate pair of the primary supplied * @param matePrimary the primary alignment of the the mate pair of the supplemental */ public static void setMateInformationOnSupplementalAlignment( final SAMRecord supplemental, final SAMRecord matePrimary) { supplemental.setMateReferenceIndex(matePrimary.getReferenceIndex()); supplemental.setMateAlignmentStart(matePrimary.getAlignmentStart()); supplemental.setMateNegativeStrandFlag(matePrimary.getReadNegativeStrandFlag()); supplemental.setMateUnmappedFlag(matePrimary.getReadUnmappedFlag()); supplemental.setInferredInsertSize(-matePrimary.getInferredInsertSize()); }
@Test public void testPairedRecords() throws IOException { final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder(); for (int i = 0; i < 5; i++) { samBuilder.addPair(String.valueOf(i), i, i, i + 100); } final Iterator<SAMRecord> records = samBuilder.iterator(); records.next().setMateReferenceName("*"); records.next().setMateAlignmentStart(Integer.MAX_VALUE); records.next().setMateAlignmentStart(records.next().getAlignmentStart() + 1); records.next().setMateNegativeStrandFlag(!records.next().getReadNegativeStrandFlag()); records.next().setMateReferenceIndex(records.next().getReferenceIndex() + 1); records.next().setMateUnmappedFlag(!records.next().getReadUnmappedFlag()); final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE); Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_ALIGNMENT_START.getHistogramString()).getValue(), 3.0); Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0); Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_NEG_STRAND.getHistogramString()).getValue(), 1.0); Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0); Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_MATE_ALIGNMENT_START.getHistogramString()).getValue(), 2.0); Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_MATE_REF_INDEX.getHistogramString()).getValue(), 2.0); Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_UNALIGNED_MATE_START.getHistogramString()).getValue(), 1.0); }
@Test public void testSimpleClippingOfRecord() { // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setCigar(TextCigarCodec.decode("10M")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(6); // should overlap 5M record.setReadBases("AAAAAAAAAA".getBytes()); final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record); Assert.assertEquals(numToClip, 5); SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("5M5S"))); }
@Test public void testClippingOfRecordWithSoftClipBasesAtTheEnd() { /** * Tests that if we need to clip a read with soft-clipping at the end, it does the right thing. */ // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setCigar(TextCigarCodec.decode("5M5S")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(5); // should overlap 1M5S record.setReadBases("AAAAAAAAAA".getBytes()); final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record); Assert.assertEquals(numToClip, 1); SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S"))); }
@Test public void testClippingOfRecordWithInsertion() { /** * Tests that if we need to clip a read with an insertion that overlaps */ // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setCigar(TextCigarCodec.decode("5M1I5M")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(5); // should overlap the 1M1I5M record.setReadBases("AAAAAAAAAAA".getBytes()); final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record); Assert.assertEquals(numToClip, 7); SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M7S"))); }
@Test public void testClippingOfRecordWithDeletion() { /** * Tests that if we need to clip a read with an deletion that overlaps */ // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setCigar(TextCigarCodec.decode("5M1D5M")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(5); // should overlap the 1M1D5M record.setReadBases("AAAAAAAAAA".getBytes()); final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record); Assert.assertEquals(numToClip, 6); SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S"))); }
@Test public void testClippingOfRecordWithMateAtSamePosition() { /** * Tests that we clip the first end of a pair if we have perfect overlap of a pair */ // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setFirstOfPairFlag(true); record.setCigar(TextCigarCodec.decode("10M")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(1); record.setReadBases("AAAAAAAAAA".getBytes()); Assert.assertEquals(SAMUtils.getNumOverlappingAlignedBasesToClip(record), 0); // now make it the second end record.setFirstOfPairFlag(false); record.setSecondOfPairFlag(true); Assert.assertEquals(SAMUtils.getNumOverlappingAlignedBasesToClip(record), 10); }
/** * Sets mate pair information appropriately on a supplemental SAMRecord (e.g. from a split alignment) * using the primary alignment of the read's mate. * @param supplemental a supplemental alignment for the mate pair of the primary supplied * @param matePrimary the primary alignment of the the mate pair of the supplemental * @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present. */ public static void setMateInformationOnSupplementalAlignment( final SAMRecord supplemental, final SAMRecord matePrimary, final boolean setMateCigar) { supplemental.setMateReferenceIndex(matePrimary.getReferenceIndex()); supplemental.setMateAlignmentStart(matePrimary.getAlignmentStart()); supplemental.setMateNegativeStrandFlag(matePrimary.getReadNegativeStrandFlag()); supplemental.setMateUnmappedFlag(matePrimary.getReadUnmappedFlag()); supplemental.setInferredInsertSize(-matePrimary.getInferredInsertSize()); if (setMateCigar && !matePrimary.getReadUnmappedFlag()) { supplemental.setAttribute(SAMTag.MC.name(), matePrimary.getCigarString()); } else { supplemental.setAttribute(SAMTag.MC.name(), null); } }
/** * Sets mate pair information appropriately on a supplemental SAMRecord (e.g. from a split alignment) * using the primary alignment of the read's mate. * @param supplemental a supplemental alignment for the mate pair of the primary supplied * @param matePrimary the primary alignment of the the mate pair of the supplemental * @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present. */ public static void setMateInformationOnSupplementalAlignment( final SAMRecord supplemental, final SAMRecord matePrimary, final boolean setMateCigar) { supplemental.setMateReferenceIndex(matePrimary.getReferenceIndex()); supplemental.setMateAlignmentStart(matePrimary.getAlignmentStart()); supplemental.setMateNegativeStrandFlag(matePrimary.getReadNegativeStrandFlag()); supplemental.setMateUnmappedFlag(matePrimary.getReadUnmappedFlag()); supplemental.setInferredInsertSize(-matePrimary.getInferredInsertSize()); if (setMateCigar && !matePrimary.getReadUnmappedFlag()) { supplemental.setAttribute(SAMTag.MC.name(), matePrimary.getCigarString()); } else { supplemental.setAttribute(SAMTag.MC.name(), null); } }
private void advance() { int candidateIndex = getIndexOfMinAlignment(); if (candidateIndex < 0) { next = null; } else { next = records[candidateIndex]; SAMSequenceRecord sequence = header.getSequence(next.getReferenceName()); next.setHeader(header); next.setReferenceIndex(sequence.getSequenceIndex()); next.setReadName(sources[candidateIndex].id + delim + next.getReadName()); if (next.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { next.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START); } else { SAMSequenceRecord mateSequence = header.getSequence(next.getMateReferenceName()); next.setMateReferenceIndex(mateSequence.getSequenceIndex()); } if (sources[candidateIndex].it.hasNext()) records[candidateIndex] = sources[candidateIndex].it.next(); else records[candidateIndex] = null; } }
@Test public void test() { SAMFileHeader header = new SAMFileHeader(); SAMRecord r1 = new SAMRecord(header); r1.setReadName("readName"); r1.setFlags(4); r1.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); r1.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); r1.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); r1.setCigar(new Cigar()); r1.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); r1.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START); r1.setReadBases("A".getBytes()); r1.setBaseQualityString("!"); BAMRecordView view = new BAMRecordView(new byte[1024]); translate(r1, view); r1.setReadName("2"); translate(r1, view); List<SAMRecord> list = toSAMRecord(view, header); assertEquals(2, list.size()); Iterator<SAMRecord> iterator = list.iterator(); SAMRecord r2 = iterator.next(); r1.setReadName("readName"); compare(r1, r2); r1.setReadName("2"); r2 = iterator.next(); compare(r1, r2); }
/** * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way * according to the newOrder mapping from dictionary index -> index. Name is used for printing only. */ private void writeReads(final SAMFileWriter out, final SAMRecordIterator it, final Map<Integer, Integer> newOrder, final String name) { long counter = 0; log.info(" Processing " + name); while (it.hasNext()) { counter++; final SAMRecord read = it.next(); final int oldRefIndex = read.getReferenceIndex(); final int oldMateIndex = read.getMateReferenceIndex(); final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder); read.setHeader(out.getFileHeader()); read.setReferenceIndex(newRefIndex); final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder); if (oldMateIndex != -1 && newMateIndex == -1) { // becoming unmapped read.setMateAlignmentStart(0); read.setMateUnmappedFlag(true); read.setAttribute(SAMTag.MC.name(), null); // Set the Mate Cigar String to null } read.setMateReferenceIndex(newMateIndex); out.addAlignment(read); } it.close(); log.info("Wrote " + counter + " reads"); }
@Test(dataProvider = "topStrandDataProvider") public void testIsTopStrand(final int referenceIndex, final int alignmentStart, final int mateReferenceIndex, final int mateAlignmentStart, final boolean firstOfPairFlag, final boolean negativeStrandFlag, final boolean mateNegativeStrandFlag, final boolean mapped, final boolean mateMapped, final UmiUtil.ReadStrand strand) { final int readLength = 15; final int contigLength = 500; final SAMFileHeader header = new SAMFileHeader(); final SAMSequenceDictionary sequenceDictionary = new SAMSequenceDictionary(); sequenceDictionary.addSequence(new SAMSequenceRecord("chr1", contigLength)); sequenceDictionary.addSequence(new SAMSequenceRecord("chr2", contigLength)); header.setSequenceDictionary(sequenceDictionary); final SAMRecord rec = new SAMRecord(header); rec.setReadUnmappedFlag(!mapped); rec.setMateUnmappedFlag(!mateMapped); rec.setReadPairedFlag(true); rec.setCigarString(readLength + "M"); rec.setAttribute("MC", readLength + "M"); rec.setReferenceIndex(referenceIndex); rec.setAlignmentStart(alignmentStart); rec.setMateReferenceIndex(mateReferenceIndex); rec.setMateAlignmentStart(mateAlignmentStart); rec.setFirstOfPairFlag(firstOfPairFlag); rec.setReadNegativeStrandFlag(negativeStrandFlag); rec.setMateNegativeStrandFlag(mateNegativeStrandFlag); Assert.assertEquals(UmiUtil.getStrand(rec), strand); }
private static void convertToSupplementaryAlignmentRecord(SAMRecord primary, SAMRecord supplementary) { supplementary.setReadName(primary.getReadName()); supplementary.setMateAlignmentStart(primary.getMateAlignmentStart()); supplementary.setMateReferenceIndex(primary.getMateReferenceIndex()); supplementary.setReadPairedFlag(primary.getReadPairedFlag()); if (primary.getReadPairedFlag()) { supplementary.setProperPairFlag(primary.getProperPairFlag()); supplementary.setFirstOfPairFlag(primary.getFirstOfPairFlag()); supplementary.setSecondOfPairFlag(primary.getSecondOfPairFlag()); supplementary.setMateUnmappedFlag(primary.getMateUnmappedFlag()); supplementary.setMateNegativeStrandFlag(primary.getMateNegativeStrandFlag()); } supplementary.setSecondaryAlignment(primary.isSecondaryAlignment()); supplementary.setDuplicateReadFlag(primary.getDuplicateReadFlag()); supplementary.setReadFailsVendorQualityCheckFlag(primary.getReadFailsVendorQualityCheckFlag()); supplementary.setSupplementaryAlignmentFlag(true); // attributes not already set by the supplementary alignment for (SAMTagAndValue attr : primary.getAttributes()) { if (supplementary.getAttribute(attr.tag) == null) { supplementary.setAttribute(attr.tag, attr.value); } } } public static String getOriginatingAlignmentUniqueName(SAMRecord splitread) {