public static List<CRAIEntry> find(final List<CRAIEntry> list, final int seqId, final int start, final int span) { final boolean whole = start < 1 || span < 1; final CRAIEntry query = new CRAIEntry(seqId, start < 1 ? 1 : start, span < 1 ? Integer.MAX_VALUE : span, Long.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE); final List<CRAIEntry> l = new ArrayList<>(); for (final CRAIEntry e : list) { if (e.getSequenceId() != seqId) { continue; } if (whole || CRAIEntry.intersect(e, query)) { l.add(e); } } Collections.sort(l, CRAIEntry.byStart); return l; }
public static CRAIEntry getLeftmost(final List<CRAIEntry> list) { if (list == null || list.isEmpty()) { return null; } CRAIEntry left = list.get(0); for (final CRAIEntry e : list) { if (e.getAlignmentStart() < left.getAlignmentStart()) { left = e; } } return left; }
@Override public String toString() { return serializeToString(); }
public static List<CRAIEntry> find(final List<CRAIEntry> list, final int seqId, final int start, final int span) { final boolean whole = start < 1 || span < 1; final CRAIEntry query = new CRAIEntry(); query.sequenceId = seqId; query.alignmentStart = start < 1 ? 1 : start; query.alignmentSpan = span < 1 ? Integer.MAX_VALUE : span; query.containerStartOffset = Long.MAX_VALUE; query.sliceOffset = Integer.MAX_VALUE; query.sliceSize = Integer.MAX_VALUE; final List<CRAIEntry> l = new ArrayList<>(); for (final CRAIEntry e : list) { if (e.sequenceId != seqId) { continue; } if (whole || CRAIEntry.intersect(e, query)) { l.add(e); } } Collections.sort(l, CRAIEntry.byStart); return l; }
@Test public void testFromCraiLine() { int counter = 1; final int sequenceId = counter++; final int alignmentStart = counter++; final int alignmentSpan = counter++; final int containerOffset = Integer.MAX_VALUE + counter++; final int sliceOffset = counter++; final int sliceSize = counter++; final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSize); final CRAIEntry entry = new CRAIEntry(line); Assert.assertNotNull(entry); Assert.assertEquals(entry.getSequenceId(), sequenceId); Assert.assertEquals(entry.getAlignmentStart(), alignmentStart); Assert.assertEquals(entry.getAlignmentSpan(), alignmentSpan); Assert.assertEquals(entry.getContainerStartByteOffset(), containerOffset); Assert.assertEquals(entry.getSliceByteOffset(), sliceOffset); Assert.assertEquals(entry.getSliceByteSize(), sliceSize); }
@Test public void testCompareTo () { final List<CRAIEntry> list = new ArrayList<>(2); CRAIEntry e1; CRAIEntry e2; e1 = newEntry(100, 0, 0); e2 = newEntry(200, 0, 0); list.add(e2); list.add(e1); Assert.assertTrue(list.get(1).getSequenceId() < list.get(0).getSequenceId()); Collections.sort(list); Assert.assertTrue(list.get(0).getSequenceId() < list.get(1).getSequenceId()); list.clear(); e1 = newEntry(1, 100, 0); e2 = newEntry(1, 200, 0); list.add(e2); list.add(e1); Assert.assertTrue(list.get(1).getAlignmentStart() < list.get(0).getAlignmentStart()); Collections.sort(list); Assert.assertTrue(list.get(0).getAlignmentStart() < list.get(1).getAlignmentStart()); list.clear(); e1 = newEntryContOffset(100); e2 = newEntryContOffset(200); list.add(e2); list.add(e1); Assert.assertTrue(list.get(1).getContainerStartByteOffset() < list.get(0).getContainerStartByteOffset()); Collections.sort(list); Assert.assertTrue(list.get(0).getContainerStartByteOffset() < list.get(1).getContainerStartByteOffset()); }
/** * Generate a CRAI Index entry from this Slice and the container offset. * * TODO: investigate why we sometimes need to pass in an external containerStartOffset * because this Slice's containerOffset is incorrect * * @param containerStartOffset the byte offset of this Slice's Container * @return a new CRAI Index Entry */ public CRAIEntry getCRAIEntry(final long containerStartOffset) { return new CRAIEntry(sequenceId, alignmentStart, alignmentSpan, containerStartOffset, offset, size); }
public static CRAIEntry updateStartContOffset(final CRAIEntry toClone, final int alignmentStart, final int containerStartOffset) { return newEntry(toClone.getSequenceId(), alignmentStart, toClone.getAlignmentSpan(), containerStartOffset, toClone.getSliceByteOffset(), toClone.getSliceByteSize()); } }
private boolean allFoundEntriesIntersectQueryInFind(final List<CRAIEntry> index, final int sequenceId, final int start, final int span) { int foundCount = 0; for (final CRAIEntry found : CRAIIndex.find(index, sequenceId, start, span)) { foundCount++; Assert.assertEquals(found.getSequenceId(), sequenceId); boolean intersects = false; for (int pos = Math.min(found.getAlignmentStart(), start); pos <= Math.max(found.getAlignmentStart() + found.getAlignmentSpan(), start + span); pos++) { if (pos >= found.getAlignmentStart() && pos >= start && pos <= found.getAlignmentStart() + found.getAlignmentSpan() && pos <= start + span) { intersects = true; break; } } if (!intersects) { return false; } } return foundCount > 0; }
@Test public void testIntersectsZeroSpan() { Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1, 1), newEntry(1, 1, 0))); }
public static CRAIEntry updateStart(final CRAIEntry toClone, final int alignmentStart) { return newEntry(toClone.getSequenceId(), alignmentStart, toClone.getAlignmentSpan()); }
/** * Write out the index to an output stream; * @param os Stream to write index to */ public void writeIndex(final OutputStream os) { Collections.sort(entries, CRAIEntry.byStartDesc); entries.stream().forEach(e -> e.writeToStream(os)); }
/** * Find index of the last aligned entry in the list. Assumes the index is sorted by coordinate and unmapped entries (with sequence id = -1) follow the mapped entries. * * @param list a list of CRAI entries * @return integer index of the last entry with sequence id not equal to -1 */ public static int findLastAlignedEntry(final List<CRAIEntry> list) { if (list.isEmpty()) { return -1; } int low = 0; int high = list.size() - 1; while (low <= high) { final int mid = (low + high) >>> 1; final CRAIEntry midVal = list.get(mid); if (midVal.getSequenceId() >= 0) { low = mid + 1; } else { high = mid - 1; } } if (low >= list.size()) { return list.size() - 1; } for (; low >= 0 && list.get(low).getSequenceId() == -1; low--) { } return low; }
public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStream, final SAMSequenceDictionary dictionary) { final List<CRAIEntry> full = CRAMCRAIIndexer.readIndex(indexStream).getCRAIEntries(); Collections.sort(full); final SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(dictionary); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final CRAMBAIIndexer indexer = new CRAMBAIIndexer(baos, header); for (final CRAIEntry entry : full) { final Slice slice = new Slice(); slice.containerOffset = entry.getContainerStartByteOffset(); slice.alignmentStart = entry.getAlignmentStart(); slice.alignmentSpan = entry.getAlignmentSpan(); slice.sequenceId = entry.getSequenceId(); // NOTE: the recordCount and sliceIndex fields can't be derived from the CRAM index // so we can only set them to zero // see https://github.com/samtools/htsjdk/issues/531 slice.nofRecords = 0; slice.index = 0; slice.offset = entry.getSliceByteOffset(); indexer.processSingleReferenceSlice(slice); } indexer.finish(); return new SeekableMemoryStream(baos.toByteArray(), "CRAI to BAI converter"); }
private void doCRAITest(BiFunction<SAMSequenceDictionary, List<CRAIEntry>, SeekableStream> getBaiStreamForIndex) { final ArrayList<CRAIEntry> index = new ArrayList<>(); final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); index.add(entry); final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); dictionary.addSequence(new SAMSequenceRecord("1", 100)); final SeekableStream baiStream = getBaiStreamForIndex.apply(dictionary, index); final DiskBasedBAMFileIndex bamIndex = new DiskBasedBAMFileIndex(baiStream, dictionary); final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); }
/** * Generate a CRAI Index entry from this Slice * @return a new CRAI Index Entry */ public CRAIEntry getCRAIEntry() { return new CRAIEntry(sequenceId, alignmentStart, alignmentSpan, containerOffset, offset, size); } /**
@Test public void testIntersectsIncluded() { Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 1, 1))); Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 2), newEntry(1, 2, 1))); // is symmetrical? Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1, 1), newEntry(1, 1, 2))); Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2, 1), newEntry(1, 1, 2))); }
/** * Write out the index to an output stream; * @param os Stream to write index to */ public void writeIndex(final OutputStream os) { Collections.sort(entries, CRAIEntry.byStartDesc); entries.stream().forEach(e -> e.writeToStream(os)); }
@Test public void testFindLastAlignedEntry() { final List<CRAIEntry> index = new ArrayList<CRAIEntry>(); Assert.assertEquals(-1, CRAIIndex.findLastAlignedEntry(index)); // Scan all allowed combinations of 10 mapped/unmapped entries and assert the found last aligned entry: final int indexSize = 10; for (int lastAligned = 0; lastAligned < indexSize; lastAligned++) { index.clear(); for (int i = 0; i < indexSize; i++) { final CRAIEntry e = CRAIEntryTest.newEntrySeqStart(i <= lastAligned ? 0 : -1, i); index.add(e); } // check expectations are correct before calling findLastAlignedEntry method: Assert.assertTrue(index.get(lastAligned).getSequenceId() != -1); if (lastAligned < index.size() - 1) { Assert.assertTrue(index.get(lastAligned + 1).getSequenceId() == -1); } // assert the the found value matches the expectation: Assert.assertEquals(CRAIIndex.findLastAlignedEntry(index), lastAligned); } }
@Test public void testFromContainer() { final Container container = new Container(); final Slice slice = new Slice(); slice.sequenceId = 1; slice.alignmentStart = 2; slice.alignmentSpan = 3; slice.containerOffset = 4; slice.offset = 5; slice.size = 6; container.landmarks = new int[]{7}; container.slices = new Slice[]{slice}; final List<CRAIEntry> entries = container.getCRAIEntries(); Assert.assertNotNull(entries); Assert.assertEquals(entries.size(), 1); final CRAIEntry entry = entries.get(0); Assert.assertEquals(entry.getSequenceId(), slice.sequenceId); Assert.assertEquals(entry.getAlignmentStart(), slice.alignmentStart); Assert.assertEquals(entry.getAlignmentSpan(), slice.alignmentSpan); Assert.assertEquals(entry.getContainerStartByteOffset(), slice.containerOffset); Assert.assertEquals(entry.getSliceByteOffset(), slice.offset); Assert.assertEquals(entry.getSliceByteSize(), slice.size); }