@DataProvider(name = "SmallCRAMTest") public Object[][] CRAMIndexTestData() { final Object[][] testFiles = new Object[][]{ {"cram/test.cram", "cram/auxf.fa", new QueryInterval(0, 12, 13), "Jim"}, {"cram_with_bai_index.cram", "hg19mini.fasta", new QueryInterval(3, 700, 0), "k"}, {"cram_with_crai_index.cram", "hg19mini.fasta", new QueryInterval(2, 350, 0), "i"}, }; return testFiles; }
/** * @throws java.lang.IllegalArgumentException if the intervals are not optimized * @see QueryInterval#optimizeIntervals(QueryInterval[]) */ private void assertIntervalsOptimized(final QueryInterval[] intervals) { if (intervals.length == 0) return; for (int i = 1; i < intervals.length; ++i) { final QueryInterval prev = intervals[i-1]; final QueryInterval thisInterval = intervals[i]; if (prev.compareTo(thisInterval) >= 0) { throw new IllegalArgumentException(String.format("List of intervals is not sorted: %s >= %s", prev, thisInterval)); } if (prev.overlaps(thisInterval)) { throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s intersects %s", prev, thisInterval)); } if (prev.endsAtStartOf(thisInterval)) { throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s abuts %s", prev, thisInterval)); } } }
public static QueryInterval[] padIntervals(SAMSequenceDictionary dictionary, QueryInterval[] intervals, int padding) { QueryInterval[] padded = Stream.of(intervals) .map(qi -> new QueryInterval(qi.referenceIndex, Math.max(1, qi.start - padding), Math.min(qi.end + padding, dictionary.getSequence(qi.referenceIndex).getSequenceLength()))) .toArray(QueryInterval[]::new); QueryInterval[] optimised = QueryInterval.optimizeIntervals(padded); return optimised; } public static boolean overlaps(QueryInterval[] intervals, int referenceIndex, int position) {
/** * a small utility to inform if one interval is cleanly before another, meaning that they do not overlap and * the first is prior (in genomic order) to the second * * @param lhs the "first" {@link QueryInterval} * @param rhs the "second" {@link QueryInterval} * @return true if the to intervals do not intersect _and_ the first is prior to the second in genomic order */ private static boolean isCleanlyBefore(final QueryInterval lhs, final QueryInterval rhs) { return !lhs.overlaps(rhs) && lhs.compareTo(rhs) < 0; }
/** * @throws java.lang.IllegalArgumentException if the intervals are not optimized * @see QueryInterval#optimizeIntervals(QueryInterval[]) */ private void assertIntervalsOptimized(final QueryInterval[] intervals) { if (intervals.length == 0) return; for (int i = 1; i < intervals.length; ++i) { final QueryInterval prev = intervals[i-1]; final QueryInterval thisInterval = intervals[i]; if (prev.compareTo(thisInterval) >= 0) { throw new IllegalArgumentException(String.format("List of intervals is not sorted: %s >= %s", prev, thisInterval)); } if (prev.overlaps(thisInterval)) { throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s intersects %s", prev, thisInterval)); } if (prev.abuts(thisInterval)) { throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s abuts %s", prev, thisInterval)); } } }
/** * @param inputIntervals WARNING: This list is modified (sorted) by this method. * @return Ordered list of intervals in which abutting and overlapping intervals are merged. */ public static QueryInterval[] optimizeIntervals(final QueryInterval[] inputIntervals) { if (inputIntervals.length == 0) return EMPTY_QUERY_INTERVAL_ARRAY; Arrays.sort(inputIntervals); final List<QueryInterval> unique = new ArrayList<QueryInterval>(); QueryInterval previous = inputIntervals[0]; for (int i = 1; i < inputIntervals.length; ++i) { final QueryInterval next = inputIntervals[i]; if (previous.endsAtStartOf(next) || previous.overlaps(next)) { final int newEnd = ((previous.end == 0 || next.end == 0) ? 0 : Math.max(previous.end, next.end)); previous = new QueryInterval(previous.referenceIndex, previous.start, newEnd); } else { unique.add(previous); previous = next; } } if (previous != null) unique.add(previous); return unique.toArray(EMPTY_QUERY_INTERVAL_ARRAY); }
/** * @param inputIntervals WARNING: This list is modified (sorted) by this method. * @return Ordered list of intervals in which abutting and overlapping intervals are merged. */ public static QueryInterval[] optimizeIntervals(final QueryInterval[] inputIntervals) { if (inputIntervals.length == 0) return EMPTY_QUERY_INTERVAL_ARRAY; Arrays.sort(inputIntervals); final List<QueryInterval> unique = new ArrayList<QueryInterval>(); QueryInterval previous = inputIntervals[0]; for (int i = 1; i < inputIntervals.length; ++i) { final QueryInterval next = inputIntervals[i]; if (previous.abuts(next) || previous.overlaps(next)) { final int newEnd = ((previous.end == 0 || next.end == 0) ? 0 : Math.max(previous.end, next.end)); previous = new QueryInterval(previous.referenceIndex, previous.start, newEnd); } else { unique.add(previous); previous = next; } } if (previous != null) unique.add(previous); return unique.toArray(EMPTY_QUERY_INTERVAL_ARRAY); } }
/** * Converts a List of SimpleIntervals into the format required by the SamReader query API * @param rawIntervals SimpleIntervals to be converted * @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API */ static QueryInterval[] prepareQueryIntervals( final List<Interval> rawIntervals, final SAMSequenceDictionary sequenceDictionary ) { if ( rawIntervals == null || rawIntervals.isEmpty() ) { return null; } // Convert each SimpleInterval to a QueryInterval final QueryInterval[] convertedIntervals = rawIntervals.stream() .map(rawInterval -> convertSimpleIntervalToQueryInterval(rawInterval, sequenceDictionary)) .toArray(QueryInterval[]::new); // Intervals must be optimized (sorted and merged) in order to use the htsjdk query API return QueryInterval.optimizeIntervals(convertedIntervals); } /**
@Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; QueryInterval that = (QueryInterval) o; return this.compareTo(that) == 0; }
/** * @param inputIntervals WARNING: This list is modified (sorted) by this method. * @return Ordered list of intervals in which abutting and overlapping intervals are merged. */ public static QueryInterval[] optimizeIntervals(final QueryInterval[] inputIntervals) { if (inputIntervals.length == 0) return EMPTY_QUERY_INTERVAL_ARRAY; Arrays.sort(inputIntervals); final List<QueryInterval> unique = new ArrayList<QueryInterval>(); QueryInterval previous = inputIntervals[0]; for (int i = 1; i < inputIntervals.length; ++i) { final QueryInterval next = inputIntervals[i]; if (previous.endsAtStartOf(next) || previous.overlaps(next)) { final int newEnd = ((previous.end == 0 || next.end == 0) ? 0 : Math.max(previous.end, next.end)); previous = new QueryInterval(previous.referenceIndex, previous.start, newEnd); } else { unique.add(previous); previous = next; } } if (previous != null) unique.add(previous); return unique.toArray(EMPTY_QUERY_INTERVAL_ARRAY); }
/** * a small utility to inform if one interval is cleanly before another, meaning that they do not overlap and * the first is prior (in genomic order) to the second * * @param lhs the "first" {@link QueryInterval} * @param rhs the "second" {@link QueryInterval} * @return true if the to intervals do not intersect _and_ the first is prior to the second in genomic order */ private static boolean isCleanlyBefore(final QueryInterval lhs, final QueryInterval rhs) { return !lhs.overlaps(rhs) && lhs.compareTo(rhs) < 0; }
/** * Converts a List of SimpleIntervals into the format required by the SamReader query API * @param rawIntervals SimpleIntervals to be converted * @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API */ static QueryInterval[] prepareQueryIntervals( final List<Interval> rawIntervals, final SAMSequenceDictionary sequenceDictionary ) { if ( rawIntervals == null || rawIntervals.isEmpty() ) { return null; } // Convert each SimpleInterval to a QueryInterval final QueryInterval[] convertedIntervals = rawIntervals.stream() .map(rawInterval -> convertSimpleIntervalToQueryInterval(rawInterval, sequenceDictionary)) .toArray(QueryInterval[]::new); // Intervals must be optimized (sorted and merged) in order to use the htsjdk query API return QueryInterval.optimizeIntervals(convertedIntervals); } /**
@Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; QueryInterval that = (QueryInterval) o; return this.compareTo(that) == 0; }
@Override public CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) { final SAMFileHeader fileHeader = getFileHeader(); final int referenceIndex = fileHeader.getSequenceIndex(sequence); return new CRAMIntervalIterator(new QueryInterval[]{new QueryInterval(referenceIndex, start, -1)}, true); }
/** * @throws java.lang.IllegalArgumentException if the intervals are not optimized * @see QueryInterval#optimizeIntervals(QueryInterval[]) */ private void assertIntervalsOptimized(final QueryInterval[] intervals) { if (intervals.length == 0) return; for (int i = 1; i < intervals.length; ++i) { final QueryInterval prev = intervals[i-1]; final QueryInterval thisInterval = intervals[i]; if (prev.compareTo(thisInterval) >= 0) { throw new IllegalArgumentException(String.format("List of intervals is not sorted: %s >= %s", prev, thisInterval)); } if (prev.overlaps(thisInterval)) { throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s intersects %s", prev, thisInterval)); } if (prev.endsAtStartOf(thisInterval)) { throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s abuts %s", prev, thisInterval)); } } }
@Test public void testOptimizeIntervals() throws Exception { final QueryInterval[] overlappingIntervals = new QueryInterval[] { new QueryInterval(0, 1520, 1521), new QueryInterval(0, 1521, 1525) }; final QueryInterval[] optimizedOverlapping = QueryInterval.optimizeIntervals(overlappingIntervals); new QueryInterval(0, 1520, 1521), new QueryInterval(0, 1522, 1525) }; final QueryInterval[] optimizedAbutting = QueryInterval.optimizeIntervals(abuttingIntervals); new QueryInterval(0, 1520, 1525), }; new QueryInterval(0, 1520, 1521), new QueryInterval(0, 1523, 1525) }; final QueryInterval[] optimizedSeparated = QueryInterval.optimizeIntervals(nonOptimizableSeparatedIntervals);
/** * Converts a List of SimpleIntervals into the format required by the SamReader query API * @param rawIntervals SimpleIntervals to be converted * @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API */ static QueryInterval[] prepareQueryIntervals( final List<Interval> rawIntervals, final SAMSequenceDictionary sequenceDictionary ) { if ( rawIntervals == null || rawIntervals.isEmpty() ) { return null; } // Convert each SimpleInterval to a QueryInterval final QueryInterval[] convertedIntervals = rawIntervals.stream() .map(rawInterval -> convertSimpleIntervalToQueryInterval(rawInterval, sequenceDictionary)) .toArray(QueryInterval[]::new); // Intervals must be optimized (sorted and merged) in order to use the htsjdk query API return QueryInterval.optimizeIntervals(convertedIntervals); } /**
@Override public CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) { final SAMFileHeader fileHeader = getFileHeader(); final int referenceIndex = fileHeader.getSequenceIndex(sequence); return new CRAMIntervalIterator(new QueryInterval[]{new QueryInterval(referenceIndex, start, -1)}, true); }
/** * @throws java.lang.IllegalArgumentException if the intervals are not optimized * @see QueryInterval#optimizeIntervals(QueryInterval[]) */ private void assertIntervalsOptimized(final QueryInterval[] intervals) { if (intervals.length == 0) return; for (int i = 1; i < intervals.length; ++i) { final QueryInterval prev = intervals[i-1]; final QueryInterval thisInterval = intervals[i]; if (prev.compareTo(thisInterval) >= 0) { throw new IllegalArgumentException(String.format("List of intervals is not sorted: %s >= %s", prev, thisInterval)); } if (prev.overlaps(thisInterval)) { throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s intersects %s", prev, thisInterval)); } if (prev.endsAtStartOf(thisInterval)) { throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s abuts %s", prev, thisInterval)); } } }
@Test(dataProvider="multipleIntervalOverlapping") public void testQueryOverlappingMultipleIntervals( final File cramFileName, final File referenceFileName, final QueryInterval[] intervals, final String[] expectedNames) throws IOException { QueryInterval[] optimizedIntervals = QueryInterval.optimizeIntervals(intervals); Assert.assertTrue(optimizedIntervals.length > 1); doQueryTest( reader -> reader.queryOverlapping(optimizedIntervals), cramFileName, referenceFileName, expectedNames ); }