@DataProvider(name = "SmallCRAMTest") public Object[][] CRAMIndexTestData() { final Object[][] testFiles = new Object[][]{ {"cram/test.cram", "cram/auxf.fa", new QueryInterval(0, 12, 13), "Jim"}, {"cram_with_bai_index.cram", "hg19mini.fasta", new QueryInterval(3, 700, 0), "k"}, {"cram_with_crai_index.cram", "hg19mini.fasta", new QueryInterval(2, 350, 0), "i"}, }; return testFiles; }
@Override public CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) { final SAMFileHeader fileHeader = getFileHeader(); final int referenceIndex = fileHeader.getSequenceIndex(sequence); return new CRAMIntervalIterator(new QueryInterval[]{new QueryInterval(referenceIndex, start, -1)}, true); }
@Override public CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) { final SAMFileHeader fileHeader = getFileHeader(); final int referenceIndex = fileHeader.getSequenceIndex(sequence); return new CRAMIntervalIterator(new QueryInterval[]{new QueryInterval(referenceIndex, start, -1)}, true); }
private static QueryInterval expandBy(QueryInterval qi, int regionPaddingSize, SAMSequenceDictionary dict) { return new QueryInterval( qi.referenceIndex, Math.max(0, qi.start), Math.min(dict.getSequence(qi.referenceIndex).getSequenceLength(), qi.end)); } }
public static QueryInterval[] padIntervals(SAMSequenceDictionary dictionary, QueryInterval[] intervals, int padding) { QueryInterval[] padded = Stream.of(intervals) .map(qi -> new QueryInterval(qi.referenceIndex, Math.max(1, qi.start - padding), Math.min(qi.end + padding, dictionary.getSequence(qi.referenceIndex).getSequenceLength()))) .toArray(QueryInterval[]::new); QueryInterval[] optimised = QueryInterval.optimizeIntervals(padded); return optimised; } public static boolean overlaps(QueryInterval[] intervals, int referenceIndex, int position) {
@DataProvider(name = "otherMultipleIntervals") public Object[][] otherMultipleIntervals() { return new Object[][]{ // accept an empty QueryIntervalArray {cramQueryWithBAI, cramQueryReference, new QueryInterval[]{}, new String[]{}}, // intervals overlapping - optimized to a single interval {cramQueryReadsWithBAI, cramQueryReadsReference, new QueryInterval[]{new QueryInterval(0, 1000, 1030), new QueryInterval(0, 1020, 1076)}, new String[]{"d"}}, {cramQueryReadsWithLocalCRAI, cramQueryReadsReference, new QueryInterval[]{new QueryInterval(0, 1000, 1030), new QueryInterval(0, 1020, 1076)}, new String[]{"d"}} }; }
/** * Convenience method to create a QueryInterval * * @param sequence sequence of interest, must exist in sequence dictionary * @param start 1-based start position, must be >= 1 * @param end 1-based end position. * @throws java.lang.IllegalArgumentException if sequence not found in sequence dictionary, or start position < 1 */ public QueryInterval makeQueryInterval(final String sequence, int start, int end) { int referenceIndex = getFileHeader().getSequenceIndex(sequence); if (referenceIndex < 0) { throw new IllegalArgumentException(String.format("Sequence '%s' not found in sequence dictionary", sequence)); } if (start < 1) { throw new IllegalArgumentException("Start position must be >= 1"); } return new QueryInterval(referenceIndex, start, end); }
private QueryInterval[] generateRandomIntervals(final int numReferences, final int count, final Random generator) { final QueryInterval[] intervals = new QueryInterval[count]; final int maxCoordinate = 10000000; for (int i = 0; i < count; i++) { final int referenceIndex = generator.nextInt(numReferences); final int coord1 = generator.nextInt(maxCoordinate + 1); final int coord2 = generator.nextInt(maxCoordinate + 1); final int startPos = Math.min(coord1, coord2); final int endPos = Math.max(coord1, coord2); intervals[i] = new QueryInterval(referenceIndex, startPos, endPos); } return intervals; }
@Override public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) { return query(new QueryInterval[]{new QueryInterval(getFileHeader().getSequenceIndex(sequence), start, end)}, contained); }
private static QueryInterval queryIntervalFromSamRecord(final SAMRecord samRecord) { return new QueryInterval(samRecord.getReferenceIndex(), samRecord.getStart(), samRecord.getEnd()); }
@Override public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) { return query(new QueryInterval[]{new QueryInterval(getFileHeader().getSequenceIndex(sequence), start, end)}, contained); }
@Override public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) { return query(new QueryInterval[]{new QueryInterval(getFileHeader().getSequenceIndex(sequence), start, end)}, contained); }
private static QueryInterval queryIntervalFromSamRecord(final SAMRecord samRecord) { return new QueryInterval(samRecord.getReferenceIndex(), samRecord.getStart(), samRecord.getEnd()); }
public QueryInterval[] asQueryInterval() { QueryInterval[] qis = new QueryInterval[intervals.asRanges().size()]; int i = 0; for (Range<Long> r : intervals.asRanges()) { QueryInterval qi = new QueryInterval(linear.getReferenceIndex(r.lowerEndpoint()), linear.getReferencePosition(r.lowerEndpoint()), linear.getReferencePosition(r.upperEndpoint() - 1)); qis[i++] = qi; if (linear.getReferenceIndex(r.upperEndpoint() - 1) != qi.referenceIndex) { throw new RuntimeException("Not Yet Implemented: support for interval spaning chromosomes and unpadded LinearGenomicCoordinate lookups. This should not happen. Please raise an issue at https://github.com/PapenfussLab/gridss/issues"); } } return qis; } public RangeSet<Long> asRangeSet() {
private void getCRAMReaderFromInputResource( final BiFunction<URL, URL, SamInputResource> getInputResource, final boolean hasIndex, final int expectedCount) throws IOException { final String cramFilePath = new File(TEST_DATA_DIR, "cram_with_bai_index.cram").getAbsolutePath(); final String cramIndexPath = new File(TEST_DATA_DIR, "cram_with_bai_index.cram.bai").getAbsolutePath(); final URL cramURL = new URL("file://" + cramFilePath); final URL indexURL = new URL("file://" + cramIndexPath); final SamReaderFactory factory = SamReaderFactory.makeDefault() .referenceSource(new ReferenceSource(new File(TEST_DATA_DIR, "hg19mini.fasta"))) .validationStringency(ValidationStringency.SILENT); final SamReader reader = factory.open(getInputResource.apply(cramURL, indexURL)); int count = hasIndex ? countRecordsInQueryInterval(reader, new QueryInterval(1, 10, 1000)) : countRecords(reader); Assert.assertEquals(count, expectedCount); }
@Test(dataProvider = "queryIntervalIssue76TestCases") public void queryIntervalIssue76(final String sequenceName, final int start, final int end, final int expectedCount) throws IOException { final File input = new File(TEST_DATA_DIR, "issue76.bam"); final SamReader reader = SamReaderFactory.makeDefault().open(input); final QueryInterval interval = new QueryInterval(reader.getFileHeader().getSequence(sequenceName).getSequenceIndex(), start, end); Assert.assertEquals(countRecordsInQueryInterval(reader, interval), expectedCount); reader.close(); }
@Test public static void testQueryInterval() { QueryInterval[] query = new QueryInterval[]{new QueryInterval(0, 1519, 1520), new QueryInterval(1, 470535, 470536)}; final CloseableIterator<SAMRecord> baiIterator = bamFileReaderBAI.query(query, false); final CloseableIterator<SAMRecord> csiIterator = bamFileReaderCSI.query(query, false); Assert.assertTrue(baiIterator.hasNext()); Assert.assertTrue(csiIterator.hasNext()); SAMRecord r1 = baiIterator.next(); SAMRecord r2 = csiIterator.next(); Assert.assertEquals(r1.getReadName(), "3968040"); Assert.assertEquals(r2.getReadName(), "3968040"); r1 = baiIterator.next(); r2 = csiIterator.next(); Assert.assertEquals(r1.getReadName(), "140419"); Assert.assertEquals(r2.getReadName(), "140419"); baiIterator.close(); csiIterator.close(); }
@Test public void testQueryInterval() throws IOException { CRAMFileReader reader = new CRAMFileReader(new ByteArraySeekableStream(cramBytes), new ByteArraySeekableStream(baiBytes), source, ValidationStringency.SILENT); QueryInterval[] query = new QueryInterval[]{new QueryInterval(0, 1519, 1520), new QueryInterval(1, 470535, 470536)}; final CloseableIterator<SAMRecord> iterator = reader.query(query, false); Assert.assertTrue(iterator.hasNext()); SAMRecord r1 = iterator.next(); Assert.assertEquals(r1.getReadName(), "3968040"); Assert.assertTrue(iterator.hasNext()); SAMRecord r2 = iterator.next(); Assert.assertEquals(r2.getReadName(), "140419"); Assert.assertFalse(iterator.hasNext()); iterator.close(); reader.close(); }
@Test public void testQueryInterval() throws IOException { CRAMFileReader reader = new CRAMFileReader( new ByteArraySeekableStream(cramBytes), new ByteArraySeekableStream(craiBytes), source, ValidationStringency.STRICT); QueryInterval[] query = new QueryInterval[]{new QueryInterval(0, 1519, 1520), new QueryInterval(1, 470535, 470536)}; final CloseableIterator<SAMRecord> iterator = reader.query(query, false); Assert.assertTrue(iterator.hasNext()); SAMRecord r1 = iterator.next(); Assert.assertEquals(r1.getReadName(), "3968040"); Assert.assertTrue(iterator.hasNext()); SAMRecord r2 = iterator.next(); Assert.assertEquals(r2.getReadName(), "140419"); Assert.assertFalse(iterator.hasNext()); iterator.close(); reader.close(); }
@Test public void testQueryIntervalWithFilePointers() throws IOException { CRAMFileReader reader = new CRAMFileReader( new ByteArraySeekableStream(cramBytes), new ByteArraySeekableStream(craiBytes), source, ValidationStringency.STRICT); QueryInterval[] query = new QueryInterval[]{new QueryInterval(0, 1519, 1520), new QueryInterval(1, 470535, 470536)}; BAMFileSpan fileSpan = BAMFileReader.getFileSpan(query, reader.getIndex()); final CloseableIterator<SAMRecord> iterator = reader.createIndexIterator(query, false, fileSpan.toCoordinateArray()); Assert.assertTrue(iterator.hasNext()); SAMRecord r1 = iterator.next(); Assert.assertEquals(r1.getReadName(), "3968040"); Assert.assertTrue(iterator.hasNext()); SAMRecord r2 = iterator.next(); Assert.assertEquals(r2.getReadName(), "140419"); Assert.assertFalse(iterator.hasNext()); iterator.close(); reader.close(); }