/** * Write the reads from this builder to output, creating an index as well * @param output the output BAM file we want to use * @return */ public File makeBAMFile(final File output) { final SAMFileWriter writer = factory.makeBAMWriter(header, true, output, 0); for ( final GATKSAMRecord read : makeReads() ) writer.addAlignment(read); writer.close(); return output; }
@DataProvider(name = "ArtificialBAMBuilderUnitTestProvider") public Object[][] makeArtificialBAMBuilderUnitTestProvider() { final List<Object[]> tests = new LinkedList<Object[]>(); final List<Integer> starts = Arrays.asList( 1, // very start of the chromosome ArtificialBAMBuilder.BAM_SHARD_SIZE - 100, // right before the shard boundary ArtificialBAMBuilder.BAM_SHARD_SIZE + 100 // right after the shard boundary ); for ( final int readLength : Arrays.asList(10, 20) ) { for ( final int skips : Arrays.asList(0, 1, 10) ) { for ( final int start : starts ) { for ( final int nSamples : Arrays.asList(1, 2) ) { for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) { for ( final int nLoci : Arrays.asList(10, 100, 1000) ) { final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(nReadsPerLocus, nLoci); bamBuilder.setReadLength(readLength); bamBuilder.setSkipNLoci(skips); bamBuilder.setAlignmentStart(start); bamBuilder.createAndSetHeader(nSamples); tests.add(new Object[]{bamBuilder, readLength, skips, start, nSamples, nReadsPerLocus, nLoci}); } } } } } } return tests.toArray(new Object[][]{}); }
private void assertGoodRead(final SAMRecord read, final ArtificialBAMBuilder bamBuilder) { Assert.assertEquals(read.getReadLength(), bamBuilder.getReadLength()); Assert.assertEquals(read.getReadBases().length, bamBuilder.getReadLength()); Assert.assertEquals(read.getBaseQualities().length, bamBuilder.getReadLength()); Assert.assertTrue(read.getAlignmentStart() >= bamBuilder.getAlignmentStart()); Assert.assertNotNull(read.getReadGroup()); } }
final int nLoci = 3; final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(reference, nReadsPerLocus, nLoci); bamBuilder.setReadLength(readLength); bamBuilder.setAlignmentStart(start); GATKSAMRecord allI = ArtificialSAMUtils.createArtificialRead(bamBuilder.getHeader(),"allI",0,start+1,readLength); allI.setCigarString(readLength + "I"); allI.setReadGroup(new GATKSAMReadGroupRecord(bamBuilder.getHeader().getReadGroups().get(0))); bamBuilder.addReads(allI); final GenomeLocSortedSet activeRegions = new GenomeLocSortedSet(bamBuilder.getGenomeLocParser()); activeRegions.add(bamBuilder.getGenomeLocParser().createGenomeLoc("1", 10, 30)); final List<GenomeLoc> intervals = Arrays.asList( genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd()) ); final Map<GenomeLoc, ActiveRegion> activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile()); for ( final GATKSAMRecord read : bamBuilder.makeReads() ) { Assert.assertTrue(readNamesInRegion.contains(read.getReadName()), "Region " + region + " should contain read " + read + " with cigar " + read.getCigarString() + " but it wasn't");
@Test(dataProvider = "ArtificialBAMBuilderUnitTestProvider") public void testBamProvider(final ArtificialBAMBuilder bamBuilder, int readLength, int skips, int start, int nSamples, int nReadsPerLocus, int nLoci) { Assert.assertEquals(bamBuilder.getReadLength(), readLength); Assert.assertEquals(bamBuilder.getSkipNLoci(), skips); Assert.assertEquals(bamBuilder.getAlignmentStart(), start); Assert.assertEquals(bamBuilder.getNSamples(), nSamples); Assert.assertEquals(bamBuilder.getnReadsPerLocus(), nReadsPerLocus); Assert.assertEquals(bamBuilder.getnLoci(), nLoci); final List<GATKSAMRecord> reads = bamBuilder.makeReads(); Assert.assertEquals(reads.size(), bamBuilder.expectedNumberOfReads()); for ( final GATKSAMRecord read : reads ) { assertGoodRead(read, bamBuilder); final File bam = bamBuilder.makeTemporarilyBAMFile(); final SamReader reader = SamReaderFactory.makeDefault().open(bam); Assert.assertTrue(reader.hasIndex()); throw new ReviewedGATKException("Unable to close " + bam , ex); Assert.assertEquals(nReadsFromBam, bamBuilder.expectedNumberOfReads());
for ( final int nReadsPerLocus : Arrays.asList(1, 2) ) { for ( final int nLoci : Arrays.asList(1, 1000) ) { final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(reference, nReadsPerLocus, nLoci); bamBuilder.setReadLength(readLength); bamBuilder.setSkipNLoci(skips); bamBuilder.setAlignmentStart(start); for ( EnumSet<ActiveRegionReadState> readStates : allReadStates ) { for ( final GenomeLocSortedSet activeRegions : enumerateActiveRegions(bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd())) { nTests++; if ( nTests < maxTests ) // && nTests == 1238 )
: new DownsamplingMethod(DownsampleType.NONE, null, null); final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(header.getSequenceDictionary(), nReadsPerLocus, nLoci); bamBuilder.createAndSetHeader(nSamples).setReadLength(readLength).setAlignmentStart(1); final List<GATKSAMRecord> reads = bamBuilder.makeReads(); li = new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(reads.iterator()), downsampler, true, keepReads, genomeLocParser, bamBuilder.getSamples());
logger.warn("Running testARTReadsInActiveRegions id=" + id + " locs " + activeRegions + " against bam " + bamBuilder); final List<GenomeLoc> intervals = Arrays.asList( genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd()) ); walker.setStates(readStates); final Map<GenomeLoc, ActiveRegion> activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile()); final Set<String> readNamesInRegion = readNamesInRegion(region); int nReadsExpectedInRegion = 0; for ( final GATKSAMRecord read : bamBuilder.makeReads() ) { final GenomeLoc readLoc = genomeLocParser.createGenomeLoc(read);
@Test(dataProvider = "ReadCacheTestData") public void testReadCache(final int nReadsPerLocus, final int nLoci, final int max, final boolean addAllAtOnce) { final TAROrderedReadCache cache = new TAROrderedReadCache(max); Assert.assertEquals(cache.getMaxCapacity(), max); Assert.assertEquals(cache.getNumDiscarded(), 0); Assert.assertEquals(cache.size(), 0); final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, nReadsPerLocus, nLoci); final List<GATKSAMRecord> reads = bamBuilder.makeReads(); if ( addAllAtOnce ) { cache.addAll(reads); } else { for ( final GATKSAMRecord read : reads ) { cache.add(read); } } final int nTotalReads = reads.size(); final int nExpectedToKeep = Math.min(nTotalReads, max); final int nExpectedToDiscard = nTotalReads - nExpectedToKeep; Assert.assertEquals(cache.getNumDiscarded(), nExpectedToDiscard, "wrong number of reads discarded"); Assert.assertEquals(cache.size(), nExpectedToKeep, "wrong number of reads kept"); final List<GATKSAMRecord> cacheReads = cache.popCurrentReads(); Assert.assertEquals(cache.size(), 0, "Should be no reads left"); Assert.assertEquals(cache.getNumDiscarded(), 0, "should have reset stats"); Assert.assertEquals(cacheReads.size(), nExpectedToKeep, "should have 1 read for every read we expected to keep"); verifySortednessOfReads(cacheReads); }
/** * A unit test that creates an artificial read for testing some code that uses reads */ @Test() public void testWithBAMFile() { // create a fake BAM file, and iterate through it final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(seq, 20, 10); final File bam = bamBuilder.makeTemporarilyBAMFile(); final SamReader reader = SamReaderFactory.makeDefault().open(bam); final Iterator<SAMRecord> bamIt = reader.iterator(); while ( bamIt.hasNext() ) { final SAMRecord read = bamIt.next(); // all reads are actually GATKSAMRecords // TODO -- add some tests that use reads from a BAM } try { reader.close(); } catch ( IOException ex ) { throw new ReviewedGATKException("Unable to close " + bam , ex); } }
public ArtificialBAMBuilder(final SAMSequenceDictionary dict, int nReadsPerLocus, int nLoci) { this.nReadsPerLocus = nReadsPerLocus; this.nLoci = nLoci; this.reference = null; this.parser = new GenomeLocParser(dict); createAndSetHeader(1); }
public ArtificialBAMBuilder(final ReferenceSequenceFile reference, int nReadsPerLocus, int nLoci) { this.nReadsPerLocus = nReadsPerLocus; this.nLoci = nLoci; this.reference = reference; this.parser = new GenomeLocParser(reference); createAndSetHeader(1); }