/** * Create a new MRUCachingSAMSequenceDictionary that provides information about sequences in dict * @param dict a non-null, non-empty sequencing dictionary */ @Ensures("lastSSR == null") public MRUCachingSAMSequenceDictionary(final SAMSequenceDictionary dict) { if ( dict == null ) throw new IllegalArgumentException("Dictionary cannot be null"); if ( dict.size() == 0 ) throw new IllegalArgumentException("Dictionary cannot have size zero"); this.dict = dict; }
@Override public void close() { closeCurrent(); currentReferenceIndex = dict.size(); } @Override
/** * @return Largest sequence index for which there are set bits. */ @Override public int getMaxSequenceIndex() { return header.getSequenceDictionary().size() - 1; }
@Override public String toString() { return "SAMSequenceDictionary:( sequences:"+ size()+ " length:"+ getReferenceLength()+" "+ " md5:"+md5()+")"; }
public static Set<String> getContigNames(SAMSequenceDictionary dict) { final Set<String> contigNames = new HashSet<String>(Utils.optimumHashSize(dict.size())); for (SAMSequenceRecord dictionaryEntry : dict.getSequences()) contigNames.add(dictionaryEntry.getSequenceName()); return contigNames; }
public BAMSplitGuesser( SeekableStream ss, InputStream headerStream, Configuration conf) throws IOException { this( ss, SAMHeaderReader .readSAMHeaderFromStream(headerStream, conf) .getSequenceDictionary().size() ); }
/** * Create a CRAM indexer that writes BAI to a stream. * * @param output Index will be written here. output will be closed when finish() method is called. * @param fileHeader header for the corresponding bam file. */ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
/** * Create a CRAM indexer that writes BAI to a file. * * @param output binary BAM Index (.bai) file * @param fileHeader header for the corresponding bam file */ public CRAMBAIIndexer(final File output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
@Override public ReferenceSequence nextSequence() { if (currentIndex >= dictionary.size()) return null; final SAMSequenceRecord sequence = dictionary.getSequence(currentIndex++); return getSequence(sequence.getSequenceName()); }
/** * Create a CRAM indexer that writes BAI to a stream. * * @param output Index will be written here. output will be closed when finish() method is called. * @param fileHeader header for the corresponding bam file. */ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
public BAMShardIndexer(OutputStream output, SAMFileHeader header, int reference) { indexBuilder = new BAMIndexBuilder(header.getSequenceDictionary(), reference); final boolean isFirstIndexShard = reference == 0; final int numReferencesToWriteInTheHeader = isFirstIndexShard ? header.getSequenceDictionary().size() : 0; outputWriter = new BinaryBAMShardIndexWriter(numReferencesToWriteInTheHeader, output); this.referenceIndex = reference; }
/** * @param output binary BAM Index (.bai) file * @param fileHeader header for the corresponding bam file */ public BAMIndexer(final File output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader.getSequenceDictionary()); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
public BAMShardIndexer(OutputStream output, SAMFileHeader header, int reference) { indexBuilder = new BAMIndexBuilder(header.getSequenceDictionary(), reference); final boolean isFirstIndexShard = reference == 0; final int numReferencesToWriteInTheHeader = isFirstIndexShard ? header.getSequenceDictionary().size() : 0; outputWriter = new BinaryBAMShardIndexWriter(numReferencesToWriteInTheHeader, output); this.referenceIndex = reference; }
/** * Prepare to index a BAM. * @param output Index will be written here. output will be closed when finish() method is called. * @param fileHeader header for the corresponding bam file. */ public BAMIndexer(final OutputStream output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader.getSequenceDictionary()); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
@Test public void testCRAMHeaderReaderWithReference() throws Exception { final Configuration conf = new Configuration(); final InputStream inputStream = ClassLoader.getSystemClassLoader().getResourceAsStream("test.cram"); final URI reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI(); conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, reference.toString()); SAMFileHeader samHeader = SAMHeaderReader.readSAMHeaderFrom(inputStream, conf); inputStream.close(); assertEquals(samHeader.getSequenceDictionary().size(), 1); }
public BAMSplitGuesser( SeekableStream ss, InputStream headerStream, Configuration conf) throws IOException { inFile = ss; header = SAMHeaderReader.readSAMHeaderFrom(headerStream, conf); referenceSequenceCount = header.getSequenceDictionary().size(); bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory()); }
public BAMSplitGuesser( SeekableStream ss, InputStream headerStream, Configuration conf) throws IOException { inFile = ss; header = SAMHeaderReader.readSAMHeaderFrom(headerStream, conf); referenceSequenceCount = header.getSequenceDictionary().size(); bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory()); }
private static void addRandomSequence(SAMFileHeader header, int length, InMemoryReferenceSequenceFile rsf) { String name = String.valueOf(header.getSequenceDictionary().size() + 1); header.addSequence(new SAMSequenceRecord(name, length)); byte[] refBases = new byte[length]; byte[] alphabet = "ACGTN".getBytes(); for (int i = 0; i < refBases.length; i++) refBases[i] = alphabet[random.nextInt(alphabet.length)]; rsf.add(name, refBases); } }
@DataProvider public Object[][] actionAndTotalBasesWithInvertData() { final long totalBasesInDict = IntervalList.fromFile(secondInput).getHeader().getSequenceDictionary().getReferenceLength(); final int totalContigsInDict = IntervalList.fromFile(secondInput).getHeader().getSequenceDictionary().size(); return new Object[][]{ {IntervalListTools.Action.CONCAT, totalBasesInDict - 201, 2 + totalContigsInDict}, {IntervalListTools.Action.UNION, totalBasesInDict - 201, 2 + totalContigsInDict}, {IntervalListTools.Action.INTERSECT, totalBasesInDict - 140, 2 + totalContigsInDict}, {IntervalListTools.Action.SUBTRACT, totalBasesInDict - 60, 2 + totalContigsInDict}, {IntervalListTools.Action.SYMDIFF, totalBasesInDict - 61, 3 + totalContigsInDict}, {IntervalListTools.Action.OVERLAPS, totalBasesInDict - 150, 2 + totalContigsInDict}, }; }
public static long countSamTotalRecord(final File samFile) { final SamReader reader = SamReaderFactory.make().open(samFile); assert reader.hasIndex(); long total = 0; for (int i = 0; i < reader.getFileHeader().getSequenceDictionary().size(); i++) { total += reader.indexing().getIndex().getMetaData(i).getAlignedRecordCount(); total += reader.indexing().getIndex().getMetaData(i).getUnalignedRecordCount(); } return total; } }