@Override public void setReferenceIndex(final int value) { if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { initializedFields.add(LazyField.REFERENCE_NAME); } super.setReferenceIndex(value); }
@Override public void setReferenceIndex(final int value) { if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { initializedFields.add(LazyField.REFERENCE_NAME); } super.setReferenceIndex(value); }
public void setReferenceName(final String value) { /* String.intern() is surprisingly expensive, so avoid it by looking up in sequence dictionary if possible */ if (NO_ALIGNMENT_REFERENCE_NAME.equals(value)) { mReferenceName = NO_ALIGNMENT_REFERENCE_NAME; mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX; return; } else if (mHeader != null) { final int referenceIndex = mHeader.getSequenceIndex(value); if (referenceIndex != -1) { setReferenceIndex(referenceIndex); return; } } // Drop through from above if nothing done. mReferenceName = value.intern(); mReferenceIndex = null; }
@Test(expectedExceptions=IllegalArgumentException.class) public void testInvalidReferenceIndex() { // unresolvable reference final SAMRecord sam = createTestRecordHelper(); sam.setReferenceIndex(9999); }
@Test(expectedExceptions=IllegalStateException.class) public void testNullHeaderSetReferenceIndex() { final SAMRecord sam = createTestRecordHelper(); sam.setHeader(null); // setReferenceIndex with null header throws sam.setReferenceIndex(3); }
@Test public void testReferenceIndex() { // NO_ALIGNMENT_REFERENCE SAMRecord sam = createTestRecordHelper(); sam.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)); Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)); // valid reference sam = createTestRecordHelper(); sam.setReferenceIndex(3); Assert.assertTrue(sam.getReferenceIndex().equals(3)); Assert.assertTrue(sam.getReferenceName().equals("chr4")); }
private static SAMRecord createSAMRecord(SAMFileHeader header, int recordIndex, int seqId, int start) { byte[] bases = "AAAAA".getBytes(); final SAMRecord record = new SAMRecord(header); record.setReferenceIndex(seqId); record.setAlignmentStart(start); record.setReadBases(bases); record.setBaseQualities(bases); record.setReadName(Integer.toString(recordIndex)); return record; }
@Test(expectedExceptions=UserException.MalformedBAM.class) public void testInvalidAlignment() { // Create an invalid alignment state. SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read1",getContig(0).getSequenceIndex(),1,10); SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read1",getContig(0).getSequenceIndex(),2,10); read1.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); List<SAMRecord> reads = Arrays.asList(read1,read2); VerifyingSamIterator iterator = new VerifyingSamIterator(GATKSAMIteratorAdapter.adapt(reads.iterator())); Assert.assertTrue(iterator.hasNext(),"Insufficient reads"); Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position"); Assert.assertTrue(iterator.hasNext(),"Insufficient reads"); // Should trigger MalformedBAM exception. iterator.next(); }
private SAMRecord makeSamRecord(final SAMFileHeader header, final int alignmentStart, final int readLength, final boolean reverse, final boolean firstOfPair) { final SAMRecord rec = new SAMRecord(header); rec.setReferenceIndex(0); final StringBuilder sb = new StringBuilder(); final byte[] quals = new byte[readLength]; for (int i = 0; i < readLength; ++i) { sb.append("A"); quals[i] = 20; } rec.setReadString(sb.toString()); rec.setBaseQualities(quals); rec.setAlignmentStart(alignmentStart); rec.setCigarString(readLength + "M"); rec.setReadPairedFlag(true); rec.setReadNegativeStrandFlag(reverse); if (firstOfPair) rec.setFirstOfPairFlag(true); else rec.setSecondOfPairFlag(true); return rec; }
public static void makeReadUnmapped(final SAMRecord rec) { if (rec.getReadNegativeStrandFlag()) { SAMRecordUtil.reverseComplement(rec); rec.setReadNegativeStrandFlag(false); } rec.setDuplicateReadFlag(false); rec.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); rec.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); rec.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); rec.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); rec.setInferredInsertSize(0); rec.setNotPrimaryAlignmentFlag(false); rec.setProperPairFlag(false); rec.setReadUnmappedFlag(true); }
@Test public void testSimpleErrorCalculator() { final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr1", 200); final SAMFileHeader samFileHeader = new SAMFileHeader(); samFileHeader.addSequence(samSequenceRecord); final SAMRecord samRecord = new SAMRecord(samFileHeader); samRecord.setReadBases("CgTGtGGAcAAAgAAA".getBytes()); final byte[] refBases = "CATGGGGAAAAAAAAA".getBytes(); final int n = refBases.length; samRecord.setReadUnmappedFlag(false); samRecord.setReadNegativeStrandFlag(true); samRecord.setAlignmentStart(1); samRecord.setReferenceIndex(0); final SimpleErrorCalculator baseErrorCalculator = new SimpleErrorCalculator(); for (int i = 0; i < n; i++) { SamLocusIterator.LocusInfo locusInfo = new SamLocusIterator.LocusInfo(samSequenceRecord, i + 1); final SAMLocusAndReference locusAndReference = new SAMLocusAndReference(locusInfo, refBases[i]); SamLocusIterator.RecordAndOffset recordAndOffset = new SamLocusIterator.RecordAndOffset(samRecord, i); baseErrorCalculator.addBase(recordAndOffset, locusAndReference); } final BaseErrorMetric metric = baseErrorCalculator.getMetric(); metric.calculateDerivedFields(); Assert.assertEquals(metric.TOTAL_BASES, n); Assert.assertEquals(metric.ERROR_BASES, 4L); }
@Test public void testSimpleClippingOfRecord() { // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setCigar(TextCigarCodec.decode("10M")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(6); // should overlap 5M record.setReadBases("AAAAAAAAAA".getBytes()); final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record); Assert.assertEquals(numToClip, 5); SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("5M5S"))); }
@Test public void testClippingOfRecordWithInsertion() { /** * Tests that if we need to clip a read with an insertion that overlaps */ // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setCigar(TextCigarCodec.decode("5M1I5M")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(5); // should overlap the 1M1I5M record.setReadBases("AAAAAAAAAAA".getBytes()); final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record); Assert.assertEquals(numToClip, 7); SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M7S"))); }
@Test public void testClippingOfRecordWithSoftClipBasesAtTheEnd() { /** * Tests that if we need to clip a read with soft-clipping at the end, it does the right thing. */ // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setCigar(TextCigarCodec.decode("5M5S")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(5); // should overlap 1M5S record.setReadBases("AAAAAAAAAA".getBytes()); final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record); Assert.assertEquals(numToClip, 1); SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S"))); }
@Test public void testClippingOfRecordWithDeletion() { /** * Tests that if we need to clip a read with an deletion that overlaps */ // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setCigar(TextCigarCodec.decode("5M1D5M")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(5); // should overlap the 1M1D5M record.setReadBases("AAAAAAAAAA".getBytes()); final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record); Assert.assertEquals(numToClip, 6); SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S"))); }
@Test public void testClippingOfRecordWithMateAtSamePosition() { /** * Tests that we clip the first end of a pair if we have perfect overlap of a pair */ // setup the record final SAMFileHeader header = new SAMFileHeader(); header.addSequence(new SAMSequenceRecord("1", 1000)); final SAMRecord record = new SAMRecord(header); record.setReadPairedFlag(true); record.setFirstOfPairFlag(true); record.setCigar(TextCigarCodec.decode("10M")); record.setReferenceIndex(0); record.setAlignmentStart(1); record.setMateReferenceIndex(0); record.setMateAlignmentStart(1); record.setReadBases("AAAAAAAAAA".getBytes()); Assert.assertEquals(SAMUtils.getNumOverlappingAlignedBasesToClip(record), 0); // now make it the second end record.setFirstOfPairFlag(false); record.setSecondOfPairFlag(true); Assert.assertEquals(SAMUtils.getNumOverlappingAlignedBasesToClip(record), 10); }
/** * Fills in bases for the given record to length. */ private SAMRecord getSAMRecord(final int refIndex, final int start, final int length) { final byte[] bases = new byte[length]; SAMFileHeader samHeader = new SAMFileHeader(); samHeader.setSequenceDictionary( new SAMSequenceDictionary( Arrays.asList( new SAMSequenceRecord("chr1", 200), new SAMSequenceRecord("chr2", 200)) ) ); SAMRecord samRec = new SAMRecord(samHeader); for (int i = 0; i < length; ++i) { bases[i] = BASES[random.nextInt(BASES.length)]; } samRec.setReadBases(bases); samRec.setReferenceIndex(refIndex); samRec.setAlignmentStart(start); samRec.setCigarString(length + "M"); return samRec; }
/** * Creates a read record. * * @param cigar the new record CIGAR. * @param group the new record group index that must be in the range \ * [1,{@link #getReadGroupCount()}] * @param reference the reference sequence index (0-based) * @param start the start position of the read alignment in the reference * (1-based) * @return never <code>null</code> */ protected SAMRecord createRead(final Cigar cigar, final int group, final int reference, final int start) { final SAMRecord record = ArtificialSAMUtils.createArtificialRead(cigar); record.setHeader(getHeader()); record.setAlignmentStart(start); record.setReferenceIndex(reference); record.setAttribute(SAMTag.RG.toString(), getReadGroupId(group)); return record; } }
@Override public void align(File fastq, File output, File reference, int threads) throws IOException { try (ReferenceSequenceFile ref = new IndexedFastaSequenceFile(reference)) { SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(ref.getSequenceDictionary()); byte[] bases = ref.getSequence(ref.getSequenceDictionary().getSequence(referenceIndex).getSequenceName()).getBases(); try (FastqReader reader = new FastqReader(fastq)) { try (SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, output)) { for (FastqRecord fqr : reader) { Alignment aln = aligner.align_smith_waterman(fqr.getReadString().getBytes(), bases); SAMRecord r = new SAMRecord(header); r.setReadName(fqr.getReadName()); r.setReferenceIndex(referenceIndex); r.setAlignmentStart(aln.getStartPosition() + 1); r.setCigarString(aln.getCigar()); r.setReadBases(fqr.getReadString().getBytes()); r.setBaseQualities(SAMUtils.fastqToPhred(fqr.getBaseQualityString())); writer.addAlignment(r); } } } } } }
/** * Build a SAM record featuring the absolute minimum required dataset. * TODO: Blatantly copied from LocusViewTemplate. Refactor these into a set of tools. * @param contig Contig to populate. * @param alignmentStart start of alignment * @param alignmentEnd end of alignment * @return New SAM Record */ protected SAMRecord buildSAMRecord( String contig, int alignmentStart, int alignmentEnd ) { SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(sequenceFile.getSequenceDictionary()); SAMRecord record = new SAMRecord(header); record.setReferenceIndex(sequenceFile.getSequenceDictionary().getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); cigar.add(new CigarElement(alignmentEnd-alignmentStart+1, CigarOperator.M)); record.setCigar(cigar); return record; }