@Override SAMSequenceDictionary extractDictionary(Path intervalList) { return IntervalList.fromPath(intervalList).getHeader().getSequenceDictionary(); } };
@Override SAMSequenceDictionary extractDictionary(Path intervalList) { return IntervalList.fromPath(intervalList).getHeader().getSequenceDictionary(); } };
public IntervalListReferenceSequenceMask(final IntervalList intervalList) { this.header = intervalList.getHeader(); if (intervalList.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { intervalList.sort(); } final List<Interval> uniqueIntervals = intervalList.getUniqueIntervals(); if (uniqueIntervals.isEmpty()) { lastSequenceIndex = -1; lastPosition = 0; } else { final Interval lastInterval = uniqueIntervals.get(uniqueIntervals.size() - 1); lastSequenceIndex = header.getSequenceIndex((lastInterval.getSequence())); lastPosition = lastInterval.getEnd(); } intervalIterator = new PeekableIterator<Interval>(uniqueIntervals.iterator()); }
/** * Merges list of intervals and reduces them like {@link #getUniqueIntervals()}. * * Note: this function modifies the object in-place and is therefore difficult to work with. * * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. * @deprecated use {@link #uniqued(boolean)#getIntervals()} or {@link #getUniqueIntervals(IntervalList, boolean)} instead. */ @Deprecated public List<Interval> getUniqueIntervals(final boolean concatenateNames) { if (getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { sort(); } return getUniqueIntervals(this, concatenateNames); }
/** * Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals() * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. */ @Deprecated //use uniqued(concatenateNames).getIntervals() or the static version instead to avoid changing the underlying object. /** * Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals() * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. */ public List<Interval> getUniqueIntervals(final boolean concatenateNames) { if (getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { sort(); } return getUniqueIntervals(this, concatenateNames); }
/** * Merges list of intervals and reduces them like {@link #getUniqueIntervals()}. * * Note: this function modifies the object in-place and is therefore difficult to work with. * * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. * @deprecated use {@link #uniqued(boolean)#getIntervals()} or {@link #getUniqueIntervals(IntervalList, boolean)} instead. */ @Deprecated public List<Interval> getUniqueIntervals(final boolean concatenateNames) { if (getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { sort(); } return getUniqueIntervals(this, concatenateNames); }
public IntervalListReferenceSequenceMask(final IntervalList intervalList) { this.header = intervalList.getHeader(); if (intervalList.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { intervalList.sorted(); } final List<Interval> uniqueIntervals = intervalList.uniqued().getIntervals(); if (uniqueIntervals.isEmpty()) { lastSequenceIndex = -1; lastPosition = 0; } else { final Interval lastInterval = uniqueIntervals.get(uniqueIntervals.size() - 1); lastSequenceIndex = header.getSequenceIndex((lastInterval.getContig())); lastPosition = lastInterval.getEnd(); } intervalIterator = new PeekableIterator<Interval>(uniqueIntervals.iterator()); }
public IntervalListReferenceSequenceMask(final IntervalList intervalList) { this.header = intervalList.getHeader(); if (intervalList.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { intervalList.sorted(); } final List<Interval> uniqueIntervals = intervalList.uniqued().getIntervals(); if (uniqueIntervals.isEmpty()) { lastSequenceIndex = -1; lastPosition = 0; } else { final Interval lastInterval = uniqueIntervals.get(uniqueIntervals.size() - 1); lastSequenceIndex = header.getSequenceIndex((lastInterval.getContig())); lastPosition = lastInterval.getEnd(); } intervalIterator = new PeekableIterator<Interval>(uniqueIntervals.iterator()); }
/** * Returns a new IntervalList where each interval is padded by the specified amount of bases. */ public IntervalList padded(final int before, final int after) { if (before < 0 || after < 0) { throw new IllegalArgumentException("Padding values must be >= 0."); } final IntervalList padded = new IntervalList(this.getHeader().clone()); final SAMSequenceDictionary dict = padded.getHeader().getSequenceDictionary(); for (final Interval i : this) { final SAMSequenceRecord seq = dict.getSequence(i.getContig()); final int start = Math.max(1, i.getStart() - before); final int end = Math.min(seq.getSequenceLength(), i.getEnd() + after); padded.add(new Interval(i.getContig(), start, end, i.isNegativeStrand(), i.getName())); } return padded; }
/** * Returns a new IntervalList where each interval is padded by the specified amount of bases. */ public IntervalList padded(final int before, final int after) { if (before < 0 || after < 0) { throw new IllegalArgumentException("Padding values must be >= 0."); } final IntervalList padded = new IntervalList(this.getHeader().clone()); final SAMSequenceDictionary dict = padded.getHeader().getSequenceDictionary(); for (final Interval i : this) { final SAMSequenceRecord seq = dict.getSequence(i.getContig()); final int start = Math.max(1, i.getStart() - before); final int end = Math.min(seq.getSequenceLength(), i.getEnd() + after); padded.add(new Interval(i.getContig(), start, end, i.isNegativeStrand(), i.getName())); } return padded; }
public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile, final Log log) { final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0); if (ribosomalIntervalsFile != null) { final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile); if (ribosomalIntervals.size() == 0) { log.warn("The RIBOSOMAL_INTERVALS file, " + ribosomalIntervalsFile.getAbsolutePath() + " does not contain intervals"); } try { SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary()); } catch (SequenceUtil.SequenceListsDifferException e) { throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(), e); } final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued(); final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals(); ribosomalSequenceOverlapDetector.addAll(intervals, intervals); } return ribosomalSequenceOverlapDetector; }
@DataProvider public Object[][] actionAndTotalBasesWithInvertData() { final long totalBasesInDict = IntervalList.fromFile(secondInput).getHeader().getSequenceDictionary().getReferenceLength(); final int totalContigsInDict = IntervalList.fromFile(secondInput).getHeader().getSequenceDictionary().size(); return new Object[][]{ {IntervalListTools.Action.CONCAT, totalBasesInDict - 201, 2 + totalContigsInDict}, {IntervalListTools.Action.UNION, totalBasesInDict - 201, 2 + totalContigsInDict}, {IntervalListTools.Action.INTERSECT, totalBasesInDict - 140, 2 + totalContigsInDict}, {IntervalListTools.Action.SUBTRACT, totalBasesInDict - 60, 2 + totalContigsInDict}, {IntervalListTools.Action.SYMDIFF, totalBasesInDict - 61, 3 + totalContigsInDict}, {IntervalListTools.Action.OVERLAPS, totalBasesInDict - 150, 2 + totalContigsInDict}, }; }
public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile, final Log log) { final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0); if (ribosomalIntervalsFile != null) { final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile); if (ribosomalIntervals.size() == 0) { log.warn("The RIBOSOMAL_INTERVALS file, " + ribosomalIntervalsFile.getAbsolutePath() + " does not contain intervals"); } try { SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary()); } catch (SequenceUtil.SequenceListsDifferException e) { throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(), e); } final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued(); final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals(); ribosomalSequenceOverlapDetector.addAll(intervals, intervals); } return ribosomalSequenceOverlapDetector; }
@Test public void testIntervalListFrom() throws IOException { final String testPath = TEST_DIR.resolve("IntervalListFromVCFTestComp.interval_list").toString(); final IntervalList fromFileList = IntervalList.fromFile(new File(testPath)); final IntervalList fromPathList = IntervalList.fromPath(IOUtil.getPath(testPath)); fromFileList.getHeader().getSequenceDictionary().assertSameDictionary(fromPathList.getHeader().getSequenceDictionary()); Assert.assertEquals(CollectionUtil.makeCollection(fromFileList.iterator()), CollectionUtil.makeCollection(fromPathList.iterator())); }
@Test(dataProvider = "testFromSequenceData") public void testFromSequenceName(final Path intervalList, final String referenceName, final Integer length) { final IntervalList intervals = IntervalList.fromPath(intervalList); final IntervalList test = IntervalList.fromName(intervals.getHeader(), referenceName); Assert.assertEquals(test.getIntervals(), CollectionUtil.makeList(new Interval(referenceName, 1, length))); }
@Test(dataProvider = "invertData") public void testInvertSquared(final IntervalList list, @SuppressWarnings("UnusedParameters") final IntervalList ignored) throws Exception { final IntervalList inverseSquared = IntervalList.invert(IntervalList.invert(list)); final IntervalList originalClone = new IntervalList(list.getHeader()); for (final Interval interval : list) { originalClone.add(interval); } Assert.assertEquals( CollectionUtil.makeCollection(inverseSquared.iterator()), CollectionUtil.makeCollection(originalClone.uniqued().iterator())); }
private static IntervalList composeIntervalList(final IntervalList source, final String chromosome, final int... segmentsByPair) { final IntervalList intervals = new IntervalList(source.getHeader()); for (int i = 0; i < segmentsByPair.length; i += 2) { final Interval parentInterval = lookupIntervalContainingLocus(source, chromosome, segmentsByPair[i]); intervals.add(new Interval(chromosome, segmentsByPair[i], segmentsByPair[i + 1], parentInterval.isNegativeStrand(), parentInterval.getName())); } return intervals; }
/** Calculates a few statistics about the bait design that can then be output. */ void calculateStatistics(final IntervalList targets, final IntervalList baits) { this.TARGET_TERRITORY = (int) targets.getUniqueBaseCount(); this.TARGET_COUNT = targets.size(); this.BAIT_TERRITORY = (int) baits.getUniqueBaseCount(); this.BAIT_COUNT = baits.size(); this.DESIGN_EFFICIENCY = this.TARGET_TERRITORY / (double) this.BAIT_TERRITORY; // Figure out the intersection between all targets and all baits final IntervalList tmp = new IntervalList(targets.getHeader()); final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0); detector.addAll(baits.getIntervals(), baits.getIntervals()); for (final Interval target : targets) { final Collection<Interval> overlaps = detector.getOverlaps(target); if (overlaps.isEmpty()) { this.ZERO_BAIT_TARGETS++; } else { for (final Interval i : overlaps) tmp.add(target.intersect(i)); } } tmp.uniqued(); this.BAIT_TARGET_TERRITORY_INTERSECTION = (int) tmp.getBaseCount(); }
@Test(expectedExceptions = IllegalArgumentException.class) public void changeHeader() { SAMFileHeader clonedHeader = fileHeader.clone(); clonedHeader.addSequence(new SAMSequenceRecord("4", 1000)); IntervalList usingClone1 = new IntervalList(clonedHeader); usingClone1.add(new Interval("4", 1, 100)); IntervalList usingClone2 = new IntervalList(clonedHeader); usingClone2.add(new Interval("4", 10, 20)); IntervalList expected = new IntervalList(clonedHeader); expected.add(new Interval("4", 1, 9)); expected.add(new Interval("4", 21, 100)); //pull rug from underneath (one call will change all the headers, since there's actually only one) usingClone1.getHeader().setSequenceDictionary(fileHeader.getSequenceDictionary()); //now interval lists are in "illegal state" since they contain contigs that are not in the header. //this next step should fail IntervalList.subtract(usingClone1, usingClone2); Assert.assertTrue(false); }
public IntervalListTest() { fileHeader = IntervalList.fromPath(TEST_DIR.resolve("IntervalListchr123_empty.interval_list")).getHeader(); fileHeader.setSortOrder(SAMFileHeader.SortOrder.unsorted); list1 = new IntervalList(fileHeader); list2 = new IntervalList(fileHeader); list3 = new IntervalList(fileHeader); list1.add(new Interval("1", 1, 100)); //de-facto: 1:1-200 1:202-300 2:100-150 2:200-300 list1.add(new Interval("1", 101, 200)); list1.add(new Interval("1", 202, 300)); list1.add(new Interval("2", 200, 300)); list1.add(new Interval("2", 100, 150)); list2.add(new Interval("1", 50, 150)); //de-facto 1:50-150 1:301-500 2:1-150 2:250-270 2:290-400 list2.add(new Interval("1", 301, 500)); list2.add(new Interval("2", 1, 150)); list2.add(new Interval("2", 250, 270)); list2.add(new Interval("2", 290, 400)); list3.add(new Interval("1", 25, 400)); //de-facto 1:25-400 2:200-600 3:50-470 list3.add(new Interval("2", 200, 600)); list3.add(new Interval("3", 50, 470)); }