private boolean compareSequenceDictionaries(final SAMFileHeader h1, final SAMFileHeader h2) { final List<SAMSequenceRecord> s1 = h1.getSequenceDictionary().getSequences(); final List<SAMSequenceRecord> s2 = h2.getSequenceDictionary().getSequences(); if (s1.size() != s2.size()) { reportDifference(s1.size(), s2.size(), "Length of sequence dictionaries"); return false; } boolean ret = true; for (int i = 0; i < s1.size(); ++i) { ret = compareSequenceRecord(s1.get(i), s2.get(i), i + 1) && ret; } return ret; }
/** * Prepare to filter out SAMRecords that do not overlap the given list of intervals * @param intervals -- must be locus-ordered & non-overlapping */ public IntervalFilter(final List<Interval> intervals, final SAMFileHeader samHeader) { this.samHeader = samHeader; IntervalUtil.assertOrderedNonOverlapping(intervals.iterator(), samHeader.getSequenceDictionary()); this.intervals = intervals.iterator(); advanceInterval(); }
public BAMShardIndexer(OutputStream output, SAMFileHeader header, int reference) { indexBuilder = new BAMIndexBuilder(header.getSequenceDictionary(), reference); final boolean isFirstIndexShard = reference == 0; final int numReferencesToWriteInTheHeader = isFirstIndexShard ? header.getSequenceDictionary().size() : 0; outputWriter = new BinaryBAMShardIndexWriter(numReferencesToWriteInTheHeader, output); this.referenceIndex = reference; }
private void validateSequenceLengths(SAMFileHeader header) { SAMSequenceDictionary dict = header.getSequenceDictionary(); for (SAMSequenceRecord seq : dict.getSequences()) { if (seq.getSequenceLength() > 536870911) { throw new RuntimeException("Sequence lengths > 2^29-1 are not supported"); } } }
@Override SAMSequenceDictionary extractDictionary(final Path dictionary) { try (BufferedLineReader bufferedLineReader = new BufferedLineReader(ParsingUtils.openInputStream(dictionary.toUri().toString()))) { final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); final SAMFileHeader header = codec.decode(bufferedLineReader, dictionary.toString()); return header.getSequenceDictionary(); } catch (final IOException e) { throw new SAMException("Could not open sequence dictionary file: " + dictionary, e); } } },
/** * Create a CRAM indexer that writes BAI to a stream. * * @param output Index will be written here. output will be closed when finish() method is called. * @param fileHeader header for the corresponding bam file. */ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
@Override protected void setup(final SAMFileHeader header, final File samFile) { IOUtil.assertFileIsWritable(OUTPUT); if (header.getSequenceDictionary().isEmpty()) { log.warn(INPUT.getAbsoluteFile() + " has no sequence dictionary. If any reads " + "in the file are aligned, then alignment summary metrics collection will fail."); } final boolean doRefMetrics = REFERENCE_SEQUENCE != null; collector = new AlignmentSummaryMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), doRefMetrics, ADAPTER_SEQUENCE, MAX_INSERT_SIZE, EXPECTED_PAIR_ORIENTATIONS, IS_BISULFITE_SEQUENCED); }
@BeforeClass public void setup() { genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); contigOneName = header.getSequenceDictionary().getSequence(1).getSequenceName(); }
@BeforeMethod public void setup() { genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); firstContig = header.getSequenceDictionary().getSequence(0).getSequenceName(); }
public static BAMScheduler createOverMappedReads(final SAMDataSource dataSource) { final BAMScheduler scheduler = new BAMScheduler(dataSource, IntervalMergingRule.ALL); final GenomeLocSortedSet intervals = GenomeLocSortedSet.createSetFromSequenceDictionary(dataSource.getHeader().getSequenceDictionary()); scheduler.populateFilteredIntervalList(intervals); return scheduler; }
private SAMFileHeader createOutHeader( final SAMFileHeader inHeader, final SAMFileHeader.SortOrder sortOrder, final boolean removeAlignmentInformation) { final SAMFileHeader outHeader = new SAMFileHeader(); outHeader.setSortOrder(sortOrder); if (!removeAlignmentInformation) { outHeader.setSequenceDictionary(inHeader.getSequenceDictionary()); outHeader.setProgramRecords(inHeader.getProgramRecords()); } return outHeader; }
public BAMSplitGuesser( SeekableStream ss, InputStream headerStream, Configuration conf) throws IOException { inFile = ss; header = SAMHeaderReader.readSAMHeaderFrom(headerStream, conf); referenceSequenceCount = header.getSequenceDictionary().size(); bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory()); }
@BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); loc = genomeLocParser.createGenomeLoc("chr1", 1); }
@BeforeClass public void init() { parser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader().getSequenceDictionary()); readBin = new ReadBin(parser, referencePadding); }
@Test( dataProvider = "parseGenomeLoc") public void testParsingPositions(final String string, final String contig, final int start) { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10000000); GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); final GenomeLoc loc = genomeLocParser.parseGenomeLoc(string); Assert.assertEquals(loc.getContig(), contig); Assert.assertEquals(loc.getStart(), start); Assert.assertEquals(loc.getStop(), start); }
@Test(dataProvider = "samFilesWithSpaceInSequenceName") public void testSamSequenceTruncation(final String filename) { final SamReader reader = SamReaderFactory.makeDefault().open(new File(TEST_DATA_DIR, filename)); for (final SAMSequenceRecord sequence : reader.getFileHeader().getSequenceDictionary().getSequences()) { Assert.assertFalse(sequence.getSequenceName().contains(" "), sequence.getSequenceName()); } for (final SAMRecord rec : reader) { Assert.assertFalse(rec.getReferenceName().contains(" ")); } CloserUtil.close(reader); }
public static SAMFileHeader duplicateSAMFileHeader(SAMFileHeader toCopy) { SAMFileHeader copy = new SAMFileHeader(); copy.setSortOrder(toCopy.getSortOrder()); copy.setGroupOrder(toCopy.getGroupOrder()); copy.setProgramRecords(toCopy.getProgramRecords()); copy.setReadGroups(toCopy.getReadGroups()); copy.setSequenceDictionary(toCopy.getSequenceDictionary()); for (Map.Entry<String, String> e : toCopy.getAttributes()) copy.setAttribute(e.getKey(), e.getValue()); return copy; }
@Test public void addRegionsOutOfOrder() { final String contigTwoName = header.getSequenceDictionary().getSequence(2).getSequenceName(); assertTrue(mSortedSet.size() == 0); GenomeLoc g = genomeLocParser.createGenomeLoc(contigTwoName, 1, 50); mSortedSet.add(g); GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80); mSortedSet.addRegion(f); assertTrue(mSortedSet.size() == 2); assertTrue(mSortedSet.toList().get(0).getContig().equals(contigOneName)); assertTrue(mSortedSet.toList().get(1).getContig().equals(contigTwoName)); }
@BeforeMethod public void doBefore() { header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); genomeLocParser =new GenomeLocParser(header.getSequenceDictionary()); engine = new GenomeAnalysisEngine(); engine.setReferenceDataSource(refFile); engine.setGenomeLocParser(genomeLocParser); obj.initialize(engine, null); }
public static long countSamTotalRecord(final File samFile) { final SamReader reader = SamReaderFactory.make().open(samFile); assert reader.hasIndex(); long total = 0; for (int i = 0; i < reader.getFileHeader().getSequenceDictionary().size(); i++) { total += reader.indexing().getIndex().getMetaData(i).getAlignedRecordCount(); total += reader.indexing().getIndex().getMetaData(i).getUnalignedRecordCount(); } return total; } }