private static long[] coordinatesFromQueryIntervals(BAMIndex index, QueryInterval[] queries) { ArrayList<BAMFileSpan> spanList = new ArrayList<>(1); Arrays.asList(queries).forEach(qi -> spanList.add(index.getSpanOverlapping(qi.referenceIndex, qi.start, qi.end))); BAMFileSpan spanArray[] = new BAMFileSpan[spanList.size()]; for (int i = 0; i < spanList.size(); i++) { spanArray[i] = spanList.get(i); } return BAMFileSpan.merge(spanArray).toCoordinateArray(); }
private static long[] coordinatesFromQueryIntervals(BAMIndex index, QueryInterval[] queries) { ArrayList<BAMFileSpan> spanList = new ArrayList<>(1); Arrays.asList(queries).forEach(qi -> spanList.add(index.getSpanOverlapping(qi.referenceIndex, qi.start, qi.end))); BAMFileSpan spanArray[] = new BAMFileSpan[spanList.size()]; for (int i = 0; i < spanList.size(); i++) { spanArray[i] = spanList.get(i); } return BAMFileSpan.merge(spanArray).toCoordinateArray(); }
/** * Use the index to determine the chunk boundaries for the required intervals. * @param intervals the intervals to restrict reads to * @param fileIndex the BAM index to use * @return file pointer pairs corresponding to chunk boundaries */ public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) { final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; for (int i = 0; i < intervals.length; ++i) { final QueryInterval interval = intervals[i]; final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); inputSpans[i] = span; } final BAMFileSpan span; if (inputSpans.length > 0) { span = BAMFileSpan.merge(inputSpans); } else { span = null; } return span; }
/** * Use the index to determine the chunk boundaries for the required intervals. * @param intervals the intervals to restrict reads to * @param fileIndex the BAM index to use * @return file pointer pairs corresponding to chunk boundaries */ public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) { final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; for (int i = 0; i < intervals.length; ++i) { final QueryInterval interval = intervals[i]; final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); inputSpans[i] = span; } final BAMFileSpan span; if (inputSpans.length > 0) { span = BAMFileSpan.merge(inputSpans); } else { span = null; } return span; }
/** * Use the index to determine the chunk boundaries for the required intervals. * @param intervals the intervals to restrict reads to * @param fileIndex the BAM index to use * @return file pointer pairs corresponding to chunk boundaries */ public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) { final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; for (int i = 0; i < intervals.length; ++i) { final QueryInterval interval = intervals[i]; final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); inputSpans[i] = span; } final BAMFileSpan span; if (inputSpans.length > 0) { span = BAMFileSpan.merge(inputSpans); } else { span = null; } return span; }
public long[] getBAMIndexPointers(File indexFile, SAMSequenceDictionary dictionary, String sequenceName, int alignmentStart, int alignmentEnd) { long[] filePointers = new long[0]; final int referenceIndex = dictionary.getSequenceIndex(sequenceName); if (referenceIndex != -1) { final BAMIndex fileIndex = BAMIndexFactory.SHARED_INSTANCE.createCachingIndex(indexFile, dictionary); final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, alignmentStart, alignmentEnd); filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null; } return filePointers; } }
/** * Prepare to iterate through SAMRecords in the given reference that start exactly at the given start coordinate. * @param referenceIndex Desired reference sequence. * @param start 1-based alignment start. */ private CloseableIterator<SAMRecord> createStartingAtIndexIterator(final int referenceIndex, final int start) { // Hit the index to determine the chunk boundaries for the required data. final BAMIndex fileIndex = getIndex(); final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, start, 0); final long[] filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null; // Create an iterator over the above chunk boundaries. final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); // Add some preprocessing filters for edge-case reads that don't fit into this // query type. return new BAMQueryFilteringIterator(iterator,new BAMStartingAtIteratorFilter(referenceIndex,start)); }
/** * Prepare to iterate through SAMRecords in the given reference that start exactly at the given start coordinate. * @param referenceIndex Desired reference sequence. * @param start 1-based alignment start. */ private CloseableIterator<SAMRecord> createStartingAtIndexIterator(final int referenceIndex, final int start) { // Hit the index to determine the chunk boundaries for the required data. final BAMIndex fileIndex = getIndex(); final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, start, 0); final long[] filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null; // Create an iterator over the above chunk boundaries. final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); // Add some preprocessing filters for edge-case reads that don't fit into this // query type. return new BAMQueryFilteringIterator(iterator,new BAMStartingAtIteratorFilter(referenceIndex,start)); }
/** * Prepare to iterate through SAMRecords in the given reference that start exactly at the given start coordinate. * @param referenceIndex Desired reference sequence. * @param start 1-based alignment start. */ private CloseableIterator<SAMRecord> createStartingAtIndexIterator(final int referenceIndex, final int start) { // Hit the index to determine the chunk boundaries for the required data. final BAMIndex fileIndex = getIndex(); final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, start, 0); final long[] filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null; // Create an iterator over the above chunk boundaries. final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); // Add some preprocessing filters for edge-case reads that don't fit into this // query type. return new BAMQueryFilteringIterator(iterator,new BAMStartingAtIteratorFilter(referenceIndex,start)); }
/** * Prepare to iterate through SAMRecords in the given reference that start exactly at the given start coordinate. * @param referenceIndex Desired reference sequence. * @param start 1-based alignment start. */ private CloseableIterator<SAMRecord> createStartingAtIndexIterator(final int referenceIndex, final int start) { // Hit the index to determine the chunk boundaries for the required data. final BAMIndex fileIndex = getIndex(); final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, start, 0); final long[] filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null; // Create an iterator over the above chunk boundaries. final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); // Add some preprocessing filters for edge-case reads that don't fit into this // query type. return new BAMQueryFilteringIterator(iterator,new BAMStartingAtIteratorFilter(referenceIndex,start)); }
private static long seek(BAMIndex index, int seqId, int start, int end, SeekableStream cramStream) throws IOException { BAMFileSpan span = index.getSpanOverlapping(seqId, start, end); if (span == null) return -1;
private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals, final boolean contained) { assertIntervalsOptimized(intervals); // Hit the index to determine the chunk boundaries for the required data. final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; final BAMIndex fileIndex = getIndex(); for (int i = 0; i < intervals.length; ++i) { final QueryInterval interval = intervals[i]; final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); inputSpans[i] = span; } final long[] filePointers; if (inputSpans.length > 0) { filePointers = BAMFileSpan.merge(inputSpans).toCoordinateArray(); } else { filePointers = null; } // Create an iterator over the above chunk boundaries. final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); // Add some preprocessing filters for edge-case reads that don't fit into this // query type. return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); }
final SAMFileSpan spanOfSecondContainer = index.getSpanOverlapping(refId, alignmentSpan.getStart(), alignmentSpan.getStart()+ alignmentSpan.getSpan()); Assert.assertNotNull(spanOfSecondContainer); Assert.assertFalse(spanOfSecondContainer.isEmpty());
final SAMFileSpan spanOfSecondContainer = index.getSpanOverlapping(refId, alignmentSpan.getStart(), alignmentSpan.getStart()+ alignmentSpan.getSpan()); Assert.assertNotNull(spanOfSecondContainer); Assert.assertFalse(spanOfSecondContainer.isEmpty());
/** * This is to check that the indexing actually works and not just skips records. The approach is to forbid reading of the first * container and try accessing reads from the first and the second containers. The first attempt should fail but the second should succeed. * * @throws IOException */ @Test public void testUnnecessaryIO() throws IOException { final SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(new ByteArraySeekableStream(indexBytes), header.getSequenceDictionary()); BAMIndex index = new CachingBAMFileIndex(baiStream, header.getSequenceDictionary()); int refID = 0; long start = index.getSpanOverlapping(refID, 1, Integer.MAX_VALUE).getFirstOffset(); long end = index.getSpanOverlapping(refID + 1, 1, Integer.MAX_VALUE).getFirstOffset(); TabuRegionInputStream tabuIS = new TabuRegionInputStream(Arrays.asList(new Chunk[]{new Chunk(start, end)}), new ByteArraySeekableStream(cramBytes)); CRAMFileReader reader = new CRAMFileReader(tabuIS, new ByteArraySeekableStream(indexBytes), source, ValidationStringency.SILENT); try { // the attempt to read 1st container, which will happen when the iterator is initialized, must throw CloseableIterator<SAMRecord> it = reader.queryAlignmentStart(header.getSequence(refID).getSequenceName(), 1); Assert.fail(); } catch (TabuError e) { } // reading after the 1st container should be ok: refID = 2; final CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart(header.getSequence(refID).getSequenceName(), 1); Assert.assertNotNull(iterator); Assert.assertTrue(iterator.hasNext()); }