private FaiEntryBuilder(final int index, final String header, final String firstSequenceLine, final int endOfLineLength, final long location) { if (header == null || header.charAt(0) != '>') { throw new SAMException("Wrong sequence header: " + header); } else if (firstSequenceLine == null) { throw new SAMException("Empty sequences could not be indexed"); } this.index = index; // parse the contig name (without the starting '>' and truncating white-spaces) this.contig = SAMSequenceRecord.truncateSequenceName(header.substring(1).trim()); this.location = location; this.basesPerLine = firstSequenceLine.length(); this.endOfLineLength = endOfLineLength; this.size = firstSequenceLine.length(); this.lessBasesFound = false; }
private FaiEntryBuilder(final int index, final String header, final String firstSequenceLine, final int endOfLineLength, final long location) { if (header == null || header.charAt(0) != '>') { throw new SAMException("Wrong sequence header: " + header); } else if (firstSequenceLine == null) { throw new SAMException("Empty sequences could not be indexed"); } this.index = index; // parse the contig name (without the starting '>' and truncating white-spaces) this.contig = SAMSequenceRecord.truncateSequenceName(header.substring(1).trim()); this.location = location; this.basesPerLine = firstSequenceLine.length(); this.endOfLineLength = endOfLineLength; this.size = firstSequenceLine.length(); this.lessBasesFound = false; }
private static Map<String, FastaSequenceIndexEntry> buildIndex(InputStream is) { Scanner scanner = new Scanner(is); int sequenceIndex = 0; Map<String, FastaSequenceIndexEntry> index = new HashMap<String, FastaSequenceIndexEntry>(); while (scanner.hasNext()) { // Tokenize and validate the index line. String result = scanner.findInLine("(.+)\\t+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)"); if (result == null) throw new RuntimeException("Found invalid line in index file:" + scanner.nextLine()); MatchResult tokens = scanner.match(); if (tokens.groupCount() != 5) throw new RuntimeException("Found invalid line in index file:" + scanner.nextLine()); // Skip past the line separator scanner.nextLine(); // Parse the index line. String contig = tokens.group(1); long size = Long.valueOf(tokens.group(2)); long location = Long.valueOf(tokens.group(3)); int basesPerLine = Integer.valueOf(tokens.group(4)); int bytesPerLine = Integer.valueOf(tokens.group(5)); contig = SAMSequenceRecord.truncateSequenceName(contig); // Build sequence structure index.put(contig, new FastaSequenceIndexEntry(contig, location, size, basesPerLine, bytesPerLine, sequenceIndex++)); } scanner.close(); return index; }
/** * Parse the contents of an index file, caching the results internally. * @param in InputStream to parse. */ private void parseIndexFile(InputStream in) { try (Scanner scanner = new Scanner(in)) { int sequenceIndex = 0; while( scanner.hasNext() ) { // Tokenize and validate the index line. String result = scanner.findInLine("(.+)\\t+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)"); if( result == null ) throw new SAMException("Found invalid line in index file:" + scanner.nextLine()); MatchResult tokens = scanner.match(); if( tokens.groupCount() != 5 ) throw new SAMException("Found invalid line in index file:" + scanner.nextLine()); // Skip past the line separator scanner.nextLine(); // Parse the index line. String contig = tokens.group(1); long size = Long.valueOf(tokens.group(2)); long location = Long.valueOf(tokens.group(3)); int basesPerLine = Integer.valueOf(tokens.group(4)); int bytesPerLine = Integer.valueOf(tokens.group(5)); contig = SAMSequenceRecord.truncateSequenceName(contig); // Build sequence structure add(new FastaSequenceIndexEntry(contig,location,size,basesPerLine,bytesPerLine, sequenceIndex++) ); } } }
/** * Parse the contents of an index file, caching the results internally. * @param in InputStream to parse. */ private void parseIndexFile(InputStream in) { try (Scanner scanner = new Scanner(in)) { int sequenceIndex = 0; while( scanner.hasNext() ) { // Tokenize and validate the index line. String result = scanner.findInLine("(.+)\\t+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)"); if( result == null ) throw new SAMException("Found invalid line in index file:" + scanner.nextLine()); MatchResult tokens = scanner.match(); if( tokens.groupCount() != 5 ) throw new SAMException("Found invalid line in index file:" + scanner.nextLine()); // Skip past the line separator scanner.nextLine(); // Parse the index line. String contig = tokens.group(1); long size = Long.valueOf(tokens.group(2)); long location = Long.valueOf(tokens.group(3)); int basesPerLine = Integer.valueOf(tokens.group(4)); int bytesPerLine = Integer.valueOf(tokens.group(5)); contig = SAMSequenceRecord.truncateSequenceName(contig); // Build sequence structure add(new FastaSequenceIndexEntry(contig,location,size,basesPerLine,bytesPerLine, sequenceIndex++) ); } } }
int bytesPerLine = Integer.valueOf(tokens.group(5)); contig = SAMSequenceRecord.truncateSequenceName(contig);
private void parseSQLine(final ParsedHeaderLine parsedHeaderLine) { assert(HeaderRecordType.SQ.equals(parsedHeaderLine.getHeaderRecordType())); if (!parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_NAME_TAG) || !parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_LENGTH_TAG)) { return; } String sequenceName = parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_NAME_TAG); sequenceName = SAMSequenceRecord.truncateSequenceName(sequenceName); final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord(sequenceName, Integer.parseInt(parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_LENGTH_TAG))); transferAttributes(samSequenceRecord, parsedHeaderLine.mKeyValuePairs); sequences.add(samSequenceRecord); }
private void parseSQLine(final ParsedHeaderLine parsedHeaderLine) { assert(HeaderRecordType.SQ.equals(parsedHeaderLine.getHeaderRecordType())); if (!parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_NAME_TAG) || !parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_LENGTH_TAG)) { return; } String sequenceName = parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_NAME_TAG); sequenceName = SAMSequenceRecord.truncateSequenceName(sequenceName); final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord(sequenceName, Integer.parseInt(parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_LENGTH_TAG))); transferAttributes(samSequenceRecord, parsedHeaderLine.mKeyValuePairs); sequences.add(samSequenceRecord); }
private void parseSQLine(final ParsedHeaderLine parsedHeaderLine) { assert(HeaderRecordType.SQ.equals(parsedHeaderLine.getHeaderRecordType())); if (!parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_NAME_TAG) || !parsedHeaderLine.requireTag(SAMSequenceRecord.SEQUENCE_LENGTH_TAG)) { return; } String sequenceName = parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_NAME_TAG); sequenceName = SAMSequenceRecord.truncateSequenceName(sequenceName); final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord(sequenceName, Integer.parseInt(parsedHeaderLine.removeValue(SAMSequenceRecord.SEQUENCE_LENGTH_TAG))); transferAttributes(samSequenceRecord, parsedHeaderLine.mKeyValuePairs); sequences.add(samSequenceRecord); }
private static SAMSequenceRecord readSequenceRecord(final BinaryCodec stream, final String source) { final int nameLength = stream.readInt(); if (nameLength <= 1) { throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source); } final String sequenceName = stream.readString(nameLength - 1); // Skip the null terminator stream.readByte(); final int sequenceLength = stream.readInt(); return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength); }
/** * Reads a single binary sequence record from the file or stream * @param source Note that this is used only for reporting errors. */ private static SAMSequenceRecord readSequenceRecord(final BinaryCodec stream, final String source) { final int nameLength = stream.readInt(); if (nameLength <= 1) { throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source); } final String sequenceName = stream.readString(nameLength - 1); // Skip the null terminator stream.readByte(); final int sequenceLength = stream.readInt(); return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength); }
/** * Reads a single binary sequence record from the file or stream * @param source Note that this is used only for reporting errors. */ private static SAMSequenceRecord readSequenceRecord(final BinaryCodec stream, final String source) { final int nameLength = stream.readInt(); if (nameLength <= 1) { throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source); } final String sequenceName = stream.readString(nameLength - 1); // Skip the null terminator stream.readByte(); final int sequenceLength = stream.readInt(); return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength); }
/** * Reads a single binary sequence record from the file or stream * @param source Note that this is used only for reporting errors. */ private static SAMSequenceRecord readSequenceRecord(final BinaryCodec stream, final String source) { final int nameLength = stream.readInt(); if (nameLength <= 1) { throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source); } final String sequenceName = stream.readString(nameLength - 1); // Skip the null terminator stream.readByte(); final int sequenceLength = stream.readInt(); return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength); }
/** * Reads a single binary sequence record from the file or stream * @param source Note that this is used only for reporting errors. */ private static SAMSequenceRecord readSequenceRecord(final BinaryCodec stream, final String source) { final int nameLength = stream.readInt(); if (nameLength <= 1) { throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source); } final String sequenceName = stream.readString(nameLength - 1); // Skip the null terminator stream.readByte(); final int sequenceLength = stream.readInt(); return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength); }
private String readSequenceName() { in.skipNewlines(); if (in.eof()) { return null; } final byte b = in.getByte(); if (b != '>') { throw new SAMException("Format exception reading FASTA " + file + ". Expected > but saw chr(" + b + ") at start of sequence with index " + this.sequenceIndex); } final byte[] nameBuffer = new byte[4096]; int nameLength = 0; do { if (in.eof()) { break; } nameLength += in.readToEndOfOutputBufferOrEoln(nameBuffer, nameLength); if (nameLength == nameBuffer.length && !in.atEoln()) { throw new SAMException("Sequence name too long in FASTA " + file); } } while (!in.atEoln()); if (nameLength == 0) { throw new SAMException("Missing sequence name in FASTA " + file); } String name = StringUtil.bytesToString(nameBuffer, 0, nameLength).trim(); if (truncateNamesAtWhitespace) { name = SAMSequenceRecord.truncateSequenceName(name); } return name; }
private String readSequenceName() { in.skipNewlines(); if (in.eof()) { return null; } final byte b = in.getByte(); if (b != '>') { throw new SAMException("Format exception reading FASTA " + getSource() + ". Expected > but saw chr(" + b + ") at start of sequence with index " + this.sequenceIndex); } final byte[] nameBuffer = new byte[4096]; int nameLength = 0; do { if (in.eof()) { break; } nameLength += in.readToEndOfOutputBufferOrEoln(nameBuffer, nameLength); if (nameLength == nameBuffer.length && !in.atEoln()) { throw new SAMException("Sequence name too long in FASTA " + getSource()); } } while (!in.atEoln()); if (nameLength == 0) { throw new SAMException("Missing sequence name in FASTA " + getSource()); } String name = StringUtil.bytesToString(nameBuffer, 0, nameLength).trim(); if (truncateNamesAtWhitespace) { name = SAMSequenceRecord.truncateSequenceName(name); } return name; }
private String readSequenceName() { in.skipNewlines(); if (in.eof()) { return null; } final byte b = in.getByte(); if (b != '>') { throw new SAMException("Format exception reading FASTA " + getSource() + ". Expected > but saw chr(" + b + ") at start of sequence with index " + this.sequenceIndex); } final byte[] nameBuffer = new byte[4096]; int nameLength = 0; do { if (in.eof()) { break; } nameLength += in.readToEndOfOutputBufferOrEoln(nameBuffer, nameLength); if (nameLength == nameBuffer.length && !in.atEoln()) { throw new SAMException("Sequence name too long in FASTA " + getSource()); } } while (!in.atEoln()); if (nameLength == 0) { throw new SAMException("Missing sequence name in FASTA " + getSource()); } String name = StringUtil.bytesToString(nameBuffer, 0, nameLength).trim(); if (truncateNamesAtWhitespace) { name = SAMSequenceRecord.truncateSequenceName(name); } return name; }
rname = SAMSequenceRecord.truncateSequenceName(rname); validateReferenceName(rname, "RNAME"); samRecord.setReferenceName(rname); mateRName = SAMSequenceRecord.truncateSequenceName(mateRName);
rname = SAMSequenceRecord.truncateSequenceName(rname); validateReferenceName(rname, "RNAME"); samRecord.setReferenceName(rname); mateRName = SAMSequenceRecord.truncateSequenceName(mateRName);
rname = SAMSequenceRecord.truncateSequenceName(rname); validateReferenceName(rname, "RNAME"); samRecord.setReferenceName(rname); mateRName = SAMSequenceRecord.truncateSequenceName(mateRName);