/** * Look up a sequence record by index. First sequence in the header is the 0th. * @return The corresponding sequence record, or null if the index is out of range. */ public SAMSequenceRecord getSequence(final int sequenceIndex) { return mSequenceDictionary.getSequence(sequenceIndex); }
/** * Look up sequence record by name. * @return sequence record if it's found by name, * or null if sequence dictionary is empty or if the sequence is not found in the dictionary. */ public SAMSequenceRecord getSequence(final String name) { return mSequenceDictionary == null ? null : mSequenceDictionary.getSequence(name); }
/** * Look up sequence record by name. * @return sequence record if it's found by name, * or null if sequence dictionary is empty or if the sequence is not found in the dictionary. */ public SAMSequenceRecord getSequence(final String name) { return mSequenceDictionary == null ? null : mSequenceDictionary.getSequence(name); }
/** * Is contig index present in the dictionary? Efficiently caching. * @param contigIndex an integer offset that might map to a contig in this dictionary * @return true if contigIndex is in dictionary, false otherwise */ @Requires("contigIndex >= 0") public final boolean hasContigIndex(final int contigIndex) { return lastIndex == contigIndex || dict.getSequence(contigIndex) != null; }
/** * Is contig present in the dictionary? Efficiently caching. * @param contig a non-null contig we want to test * @return true if contig is in dictionary, false otherwise */ @Requires("contig != null") public final boolean hasContig(final String contig) { return contig.equals(lastContig) || dict.getSequence(contig) != null; }
/** * Throw an exception if all the "to" sequence names in the chains are not found in the given sequence dictionary. */ public void validateToSequences(final SAMSequenceDictionary sequenceDictionary) { for (final Chain chain : chains.getAll()) { if (sequenceDictionary.getSequence(chain.toSequenceName) == null) { throw new SAMException("Sequence " + chain.toSequenceName + " from chain file is not found in sequence dictionary."); } } }
@Override public ReferenceSequence nextSequence() { if (currentIndex >= dictionary.size()) return null; final SAMSequenceRecord sequence = dictionary.getSequence(currentIndex++); return getSequence(sequence.getSequenceName()); }
public final GenomeLocSortedSet extendIntervals( final GenomeLocSortedSet intervals, final GenomeLocParser genomeLocParser, final ReferenceSequenceFile reference ) { final int activeRegionExtension = this.getClass().getAnnotation(ActiveRegionTraversalParameters.class).extension(); final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(); for( final GenomeLoc interval : intervals.toList() ) { final int start = Math.max( 1, interval.getStart() - activeRegionExtension ); final int stop = Math.min( reference.getSequenceDictionary().getSequence(interval.getContig()).getSequenceLength(), interval.getStop() + activeRegionExtension ); allIntervals.add( genomeLocParser.createGenomeLoc(interval.getContig(), start, stop) ); } return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, IntervalMergingRule.ALL); }
/** * Gets the stop of the expanded window, bounded if necessary by the contig. * @param locus The locus to expand. * @return The expanded window. */ private int getWindowStop( GenomeLoc locus ) { // If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can. int sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength(); if(locus.getStop() > sequenceLength) return sequenceLength; return Math.min( locus.getStop() + windowStop, sequenceLength ); } }
/** * Determines whether the read aligns off the end of the contig * * @param read the read to check * @return true if it aligns off the end */ private boolean realignmentProducesBadAlignment(final GATKSAMRecord read) { final int contigLength = referenceReader.getSequenceDictionary().getSequence(currentInterval.getContig()).getSequenceLength(); return realignmentProducesBadAlignment(read, contigLength); }
@BeforeClass public void setup() { genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); contigOneName = header.getSequenceDictionary().getSequence(1).getSequenceName(); }
/** * Test the initial fasta location. */ @Test public void testReferenceStart() { validateLocation( genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(),1,25) ); }
@Test(dataProvider = "secondContigStart") public void addMergeContiguousOutOfOrder(final int secondContigStart) { final String contigTwoName = header.getSequenceDictionary().getSequence(2).getSequenceName(); assertTrue(mSortedSet.size() == 0); GenomeLoc g = genomeLocParser.createGenomeLoc(contigTwoName, 1, 50); mSortedSet.add(g); GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, secondContigStart, 80); mSortedSet.addRegion(f); assertTrue(mSortedSet.size() == 2); assertTrue(mSortedSet.toList().get(0).getContig().equals(contigOneName)); assertTrue(mSortedSet.toList().get(1).getContig().equals(contigTwoName)); }
@Test(dataProvider = "fastas", enabled = true && ! DEBUG) public void testCachingIndexedFastaReaderSequential1(File fasta, int cacheSize, int querySize) throws FileNotFoundException { final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true, false); SAMSequenceRecord contig = caching.getSequenceDictionary().getSequence(0); logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d", contig.getSequenceName(), contig.getSequenceLength(), cacheSize, querySize)); testSequential(caching, fasta, querySize); }
@BeforeMethod public void setup() { genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); firstContig = header.getSequenceDictionary().getSequence(0).getSequenceName(); }
@Requires("referenceReader.isUppercasingBases()") public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) { // set up the reference if we haven't done so yet if ( reference == null ) { // first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read) int padLeft = Math.max(loc.getStart()- referencePadding, 1); int padRight = Math.min(loc.getStop()+ referencePadding, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength()); loc = parser.createGenomeLoc(loc.getContig(), loc.getContigIndex(), padLeft, padRight); reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); } return reference; }
@Test public void testNoVariants() { final IntervalList intervalList = new IntervalList(header); intervalList.add(new Interval(this.dict.getSequence(0).getSequenceName(), 1, 100)); final VCFFileReader reader = getReader(EMPTY_VCF); final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList); Assert.assertFalse(iterator.hasNext()); reader.close(); }
/** Queries outside the bounds of the shard should result in reference context window trimmed at the shard boundary. */ @Test public void testBoundsFailure() { Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), 1, 50))); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); GenomeLoc locus = genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), 50, 51); ReferenceContext rc = view.getReferenceContext(locus); Assert.assertTrue(rc.getLocus().equals(locus)); Assert.assertTrue(rc.getWindow().equals(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(),50))); Assert.assertTrue(rc.getBases().length == 1); }
@Test(enabled = true && ! DEBUG, dataProvider = "TraversalEngineProvider") public void testActiveRegionExtensionOnContig(TraverseActiveRegions t) { DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); Collection<ActiveRegion> activeRegions = getActiveRegions(t, walker, intervals).values(); for (ActiveRegion activeRegion : activeRegions) { GenomeLoc loc = activeRegion.getExtendedLoc(); // Contract: active region extensions must stay on the contig Assert.assertTrue(loc.getStart() > 0, "Active region extension begins at location " + loc.getStart() + ", past the left end of the contig"); int refLen = dictionary.getSequence(loc.getContigIndex()).getSequenceLength(); Assert.assertTrue(loc.getStop() <= refLen, "Active region extension ends at location " + loc.getStop() + ", past the right end of the contig"); } }
@BeforeTest public void before() { // Create GenomeLoc ReferenceSequenceFile fasta = CachingIndexedFastaSequenceFile.checkAndCreate(new File(privateTestDir + "iupacFASTA.fasta")); GenomeLocParser genomeLocParser = new GenomeLocParser(fasta); chr1 = fasta.getSequenceDictionary().getSequence(0).getSequenceName(); genomeLoc = genomeLocParser.createGenomeLoc(chr1, 5, 10); }