/** * Trivial helper that returns true if elt has the same length as rec1 or rec2 * @param elt record to test * @param rec1 first record to test for length equivalence * @param rec2 first record to test for length equivalence * @return true if elt has the same length as either rec1 or rec2 */ private static boolean isHumanSeqRecord(SAMSequenceRecord elt, SAMSequenceRecord rec1, SAMSequenceRecord rec2 ) { return elt.getSequenceLength() == rec1.getSequenceLength() || elt.getSequenceLength() == rec2.getSequenceLength(); }
/** * @return The sum of the lengths of the sequences in this dictionary */ public long getReferenceLength() { long len = 0L; for (final SAMSequenceRecord seq : getSequences()) { len += seq.getSequenceLength(); } return len; }
@Override public ReferenceSequence nextSequence() { SAMSequenceRecord sequence = sequenceRecordIterator.next(); return getSubsequenceAt(sequence.getSequenceName(), 1L, sequence.getSequenceLength()); }
private void validateSequenceLengths(SAMFileHeader header) { SAMSequenceDictionary dict = header.getSequenceDictionary(); for (SAMSequenceRecord seq : dict.getSequences()) { if (seq.getSequenceLength() > 536870911) { throw new RuntimeException("Sequence lengths > 2^29-1 are not supported"); } } }
/** * @return the largest position on the last sequence index */ @Override public int getMaxPosition() { SAMSequenceRecord lastSequenceRecord = header.getSequence(getMaxSequenceIndex()); return lastSequenceRecord.getSequenceLength(); } }
/** Set the sequence dictionary entries for the index property list. */ @Override public void setIndexSequenceDictionary(final SAMSequenceDictionary dict) { for (final SAMSequenceRecord seq : dict.getSequences()) { final String contig = SEQUENCE_DICTIONARY_PROPERTY_PREDICATE + seq.getSequenceName(); final String length = String.valueOf(seq.getSequenceLength()); addProperty(contig,length); } } }
/** Set the sequence dictionary entries for the index property list. */ @Override public void setIndexSequenceDictionary(final SAMSequenceDictionary dict) { for (final SAMSequenceRecord seq : dict.getSequences()) { final String contig = SEQUENCE_DICTIONARY_PROPERTY_PREDICATE + seq.getSequenceName(); final String length = String.valueOf(seq.getSequenceLength()); addProperty(contig,length); } } }
@DataProvider(name = "GenomeLocOnContig") public Object[][] makeGenomeLocOnContig() { final List<Object[]> tests = new LinkedList<Object[]>(); final int contigLength = header.getSequence(0).getSequenceLength(); for ( int start = -10; start < contigLength + 10; start++ ) { for ( final int len : Arrays.asList(1, 10, 20) ) { tests.add(new Object[]{ "chr1", start, start + len }); } } return tests.toArray(new Object[][]{}); }
protected String getSQLine(final SAMSequenceRecord sequenceRecord) { final int numAttributes = sequenceRecord.getAttributes() != null ? sequenceRecord.getAttributes().size() : 0; final String[] fields = new String[3 + numAttributes]; fields[0] = HEADER_LINE_START + HeaderRecordType.SQ; fields[1] = SAMSequenceRecord.SEQUENCE_NAME_TAG + TAG_KEY_VALUE_SEPARATOR + sequenceRecord.getSequenceName(); fields[2] = SAMSequenceRecord.SEQUENCE_LENGTH_TAG + TAG_KEY_VALUE_SEPARATOR + Integer.toString(sequenceRecord.getSequenceLength()); encodeTags(sequenceRecord, fields, 3); return StringUtil.join(FIELD_SEPARATOR, fields); }
private void writeSQLine(final SAMSequenceRecord sequenceRecord) { final int numAttributes =sequenceRecord.getAttributes() != null ? sequenceRecord.getAttributes().size() : 0; final String[] fields = new String[3 + numAttributes]; fields[0] = HEADER_LINE_START + HeaderRecordType.SQ; fields[1] = SAMSequenceRecord.SEQUENCE_NAME_TAG + TAG_KEY_VALUE_SEPARATOR + sequenceRecord.getSequenceName(); fields[2] = SAMSequenceRecord.SEQUENCE_LENGTH_TAG + TAG_KEY_VALUE_SEPARATOR + Integer.toString(sequenceRecord.getSequenceLength()); encodeTags(sequenceRecord, fields, 3); println(StringUtil.join(FIELD_SEPARATOR, fields)); }
VCFContigHeaderLine(final SAMSequenceRecord sequenceRecord, final String assembly) { // Using LinkedHashMap to preserve order of keys in contig line (ID, length, assembly) super(VCFHeader.CONTIG_KEY, new LinkedHashMap<String, String>() {{ // Now inside an init block in an anon HashMap subclass this.put("ID", sequenceRecord.getSequenceName()); this.put("length", Integer.toString(sequenceRecord.getSequenceLength())); if ( assembly != null ) this.put("assembly", assembly); }}); this.contigIndex = sequenceRecord.getSequenceIndex(); }
/** * Gets the stop of the expanded window, bounded if necessary by the contig. * @param locus The locus to expand. * @return The expanded window. */ private int getWindowStop( GenomeLoc locus ) { // If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can. int sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength(); if(locus.getStop() > sequenceLength) return sequenceLength; return Math.min( locus.getStop() + windowStop, sequenceLength ); } }
VCFContigHeaderLine(final SAMSequenceRecord sequenceRecord, final String assembly) { // Using LinkedHashMap to preserve order of keys in contig line (ID, length, assembly) super(VCFHeader.CONTIG_KEY, new LinkedHashMap<String, String>() {{ // Now inside an init block in an anon HashMap subclass this.put("ID", sequenceRecord.getSequenceName()); this.put("length", Integer.toString(sequenceRecord.getSequenceLength())); if ( assembly != null ) this.put("assembly", assembly); }}); this.contigIndex = sequenceRecord.getSequenceIndex(); }
/** * Helper function to print out a sequence dictionary */ private void printDictionary(String name, SAMSequenceDictionary dict) { log.info(name); for (final SAMSequenceRecord contig : dict.getSequences()) { log.info(" SN=%s LN=%d%n", contig.getSequenceName(), contig.getSequenceLength()); } } }
/** * Creates an IntervalList from the given sequence name * * @param header header to use to create IntervalList * @param sequenceName name of sequence in header * @return a new intervalList with given header that contains the reference name */ public static IntervalList fromName(final SAMFileHeader header, final String sequenceName) { final IntervalList ref = new IntervalList(header); ref.add(new Interval(sequenceName, 1, header.getSequence(sequenceName).getSequenceLength())); return ref; }
private static ReferenceSequence buildReferenceSequence (SAMSequenceRecord samSequenceRecord) { byte[] bases = new byte[samSequenceRecord.getSequenceLength()] ; Arrays.fill(bases, (byte)'N'); return new ReferenceSequence(samSequenceRecord.getSequenceName(), samSequenceRecord.getSequenceIndex(), bases) ; }
/** * Creates a GenomeLoc than spans the entire contig. * @param contigName Name of the contig. * @return A locus spanning the entire contig. */ @Requires("contigName != null") @Ensures("result != null") public GenomeLoc createOverEntireContig(final String contigName) { SAMSequenceRecord contig = getContigInfo().getSequence(contigName); return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true); }
/** * Test the end of a contig. */ @Test public void testReferenceEnd() { // Test the last 25 bases of the first contig. SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1); final int contigStart = selectedContig.getSequenceLength() - 24; final int contigStop = selectedContig.getSequenceLength(); validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),contigStart,contigStop) ); }
@Test public void realignAtContigBorderTest() { final int contigEnd = header.getSequence(0).getSequenceLength(); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "goodRead", 0, contigEnd - 1, 2); read.setCigarString("2M"); Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), false); read.setCigarString("1M1D1M"); Assert.assertEquals(IndelRealigner.realignmentProducesBadAlignment(read, contigEnd), true); }
@Test public void testCigarOffEndOfReferenceValidation() throws Exception { final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder(); samBuilder.addFrag(String.valueOf(0), 0, 1, false); final int contigLength = samBuilder.getHeader().getSequence(0).getSequenceLength(); // Should hang off the end. samBuilder.addFrag(String.valueOf(1), 0, contigLength - 1, false); final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE); Assert.assertNotNull(results.get(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE.getHistogramString())); Assert.assertEquals(results.get(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE.getHistogramString()).getValue(), 1.0); }