/** * Create a span from the given start point to the end of the file. * @param startOfRegion Start of the region, in encoded coordinates (block start << 16 & block offset). * @return A file span from the given point to the end of the file. */ private GATKBAMFileSpan createSpanToEndOfFile(final long startOfRegion) { return new GATKBAMFileSpan(new GATKChunk(startOfRegion,Long.MAX_VALUE)); }
if(thisChunk.getChunkEnd() <= otherChunk.getChunkStart()) { thisChunk = thisIterator.hasNext() ? thisIterator.next() : null; continue; if(thisChunk.getChunkStart() >= otherChunk.getChunkEnd()) { otherChunk = otherIterator.hasNext() ? otherIterator.next() : null; continue; if(thisChunk.overlaps(otherChunk)) { GATKChunk firstChunk = thisChunk.getChunkStart() < otherChunk.getChunkStart() ? thisChunk : otherChunk; GATKChunk secondChunk = thisChunk==firstChunk ? otherChunk : thisChunk; GATKChunk intersectedChunk = new GATKChunk(secondChunk.getChunkStart(),Math.min(firstChunk.getChunkEnd(),secondChunk.getChunkEnd())); intersected.add(intersectedChunk); if(thisChunk.getChunkEnd() > intersectedChunk.getChunkEnd()) thisChunk = new GATKChunk(intersectedChunk.getChunkEnd(),thisChunk.getChunkEnd()); else thisChunk = thisIterator.hasNext() ? thisIterator.next() : null; if(otherChunk.getChunkEnd() > intersectedChunk.getChunkEnd()) otherChunk = new GATKChunk(intersectedChunk.getChunkEnd(),otherChunk.getChunkEnd()); else otherChunk = otherIterator.hasNext() ? otherIterator.next() : null;
private List<GATKChunk> optimizeChunkList(final List<GATKChunk> chunks, final long minimumOffset) { GATKChunk lastChunk = null; Collections.sort(chunks); final List<GATKChunk> result = new ArrayList<GATKChunk>(); for (final GATKChunk chunk : chunks) { if (chunk.getChunkEnd() <= minimumOffset) { continue; // linear index optimization } if (result.isEmpty()) { result.add(chunk); lastChunk = chunk; continue; } // Coalesce chunks that are in adjacent file blocks. // This is a performance optimization. if (!lastChunk.overlaps(chunk) && !lastChunk.isAdjacentTo(chunk)) { result.add(chunk); lastChunk = chunk; } else { if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) { lastChunk.setChunkEnd(chunk.getChunkEnd()); } } } return result; }
@Override public GATKChunk clone() { return new GATKChunk(getChunkStart(),getChunkEnd()); }
/** * Computes an approximation of the uncompressed size of the * chunk, in bytes. Can be used to determine relative weights * of chunk size. * @return An approximation of the chunk size in bytes. */ public long size() { final long chunkSpan = Math.round(((getChunkEnd()>>16)-(getChunkStart()>>16))/AVERAGE_BAM_COMPRESSION_RATIO); final int offsetSpan = (int)((getChunkEnd()&0xFFFF)-(getChunkStart()&0xFFFF)); return chunkSpan + offsetSpan; }
List<GATKChunk> spansOverlapping = new LinkedList<GATKChunk>(); while(positionIterator.hasNext() && positionIterator.peek().getBlockStart() <= blockAddress) { int blockOffsetStart = (blockAddress == positionIterator.peek().getBlockStart()) ? positionIterator.peek().getBlockOffsetStart() : 0; if(blockAddress < positionIterator.peek().getBlockEnd()) { blockEnd = filePosition; blockOffsetEnd = 0; blockEnd = positionIterator.peek().getBlockEnd(); blockOffsetEnd = positionIterator.peek().getBlockOffsetEnd(); GATKChunk newChunk = new GATKChunk(blockAddress,blockOffsetStart,blockEnd,blockOffsetEnd); if(newChunk.getChunkStart() <= newChunk.getChunkEnd()) spansOverlapping.add(new GATKChunk(blockAddress,blockOffsetStart,blockEnd,blockOffsetEnd)); if(!positionIterator.hasNext() || positionIterator.peek().getBlockEnd() > blockAddress) break; if(positionIterator.peek().getBlockEnd() <= blockAddress) positionIterator.next();
@Test public void testSizeOfChunkWithinSingleBlock() { GATKChunk chunk = new GATKChunk(0,FULL_BLOCK_UNCOMPRESSED_SIZE-1); Assert.assertEquals(chunk.size(),FULL_BLOCK_UNCOMPRESSED_SIZE-1,"Chunk spanning limits of block is returning wrong size."); chunk = new GATKChunk(0,HALF_BLOCK_UNCOMPRESSED_SIZE); Assert.assertEquals(chunk.size(),HALF_BLOCK_UNCOMPRESSED_SIZE,"Chunk spanning 1/2 block is returning the wrong size."); }
(currentChunk.overlaps(unmergedUnion.peek()) || currentChunk.isAdjacentTo(unmergedUnion.peek())) ) { currentChunk = currentChunk.merge(nextChunk);
private boolean isFilePositionPastEndOfChunk(final long filePosition, final GATKChunk chunk) { return filePosition >= chunk.getChunkEnd(); } }
public long getBlockStart() { return getChunkStart() >>> 16; }
/** * Advances the current position to the next block to read, given the current position in the file. * @param filePosition The current position within the file. */ void advancePosition(final long filePosition) { nextBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(filePosition); // Check the current file position against the iterator; if the iterator is before the current file position, // draw the iterator forward. Remember when performing the check that coordinates are half-open! while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) positionIterator.next(); // If the block iterator has shot past the file pointer, bring the file pointer flush with the start of the current block. if(positionIterator.hasNext() && filePosition < positionIterator.peek().getChunkStart()) nextBlockAddress = positionIterator.peek().getBlockStart(); // If we've shot off the end of the block pointer, notify consumers that iteration is complete. if(!positionIterator.hasNext()) nextBlockAddress = -1; }
/** * Returns an approximation of the number of uncompressed bytes in this * file span. * @return Approximation of uncompressed bytes in filespan. */ public long size() { long size = 0L; for(GATKChunk chunk: getGATKChunks()) size += chunk.size(); return size; }
/** * Returns true if any of the file spans in this FilePointer overlap their counterparts in * the other FilePointer. "Overlap" is defined as having an overlapping extent (the region * from the start of the first chunk to the end of the last chunk). * * @param other the FilePointer against which to check overlap with this FilePointer * @return true if any file spans overlap their counterparts in other, otherwise false */ public boolean hasFileSpansOverlappingWith( FilePointer other ) { for ( Map.Entry<SAMReaderID, SAMFileSpan> thisFilePointerEntry : fileSpans.entrySet() ) { GATKBAMFileSpan thisFileSpan = new GATKBAMFileSpan(thisFilePointerEntry.getValue()); SAMFileSpan otherEntry = other.fileSpans.get(thisFilePointerEntry.getKey()); if ( otherEntry == null ) { continue; // no counterpart for this file span in other } GATKBAMFileSpan otherFileSpan = new GATKBAMFileSpan(otherEntry); if ( thisFileSpan.getExtent().overlaps(otherFileSpan.getExtent()) ) { return true; } } return false; }
for(GATKBin coveringBin: binTree) { for(GATKChunk chunk: coveringBin.getChunkList()) chunkList.add(chunk.clone());
blockOffsets.add(buffer.position()); blockPositions.removeLast(); blockPositions.add(spanOverlapping.getChunkStart()); incomingBuffer.limit((spanOverlapping.getBlockEnd() > spanOverlapping.getBlockStart()) ? bytesInIncomingBuffer : spanOverlapping.getBlockOffsetEnd()); incomingBuffer.position(spanOverlapping.getBlockOffsetStart()); buffer.put(incomingBuffer); blockPositions.add(spanOverlapping.getChunkEnd());
/** * Merges two chunks together. The caller is responsible for testing whether the * chunks overlap/are adjacent before calling this method! * * @param other the chunk to merge with this chunk * @return a new chunk representing the union of the two chunks (provided the chunks were * overlapping/adjacent) */ public GATKChunk merge ( GATKChunk other ) { return new GATKChunk(Math.min(getChunkStart(), other.getChunkStart()), Math.max(getChunkEnd(), other.getChunkEnd())); } }
buffer.putInt(fileSpan.getGATKChunks().size()); for(GATKChunk chunk: fileSpan.getGATKChunks()) { buffer.putLong(chunk.getChunkStart()); buffer.putLong(chunk.getChunkEnd());
public long getBlockEnd() { return getChunkEnd() >>> 16; }
public int getBlockOffsetStart() { return (int)(getChunkStart() & 0xFFFF); }