/** Returns true if there is another record available post-downsampling, false otherwise. */ @Override public boolean hasNext() { return this.nextRecord != null || advance(); }
this.nextRecord = null; while (this.nextRecord == null && (this.bufferedRecords.hasNext() || bufferNextChunkOfRecords(getTargetProportion(), this.targetAccuracy))) { final SAMRecord rec = this.bufferedRecords.next(); final String key = rec.getReadName(); recordAcceptedRecord(); recordDiscardedRecord();
/** * Creates a new DownsamplingIterator using the supplied Strategy that attempts to read from the provided iterator and return * approximately proportion of the records read. * * @param iterator The iterator from which to consume SAMRecords * @param strategy The downsampling strategy to use * @param proportion The proportion of records the downsampling strategy should attempt to emit * @param accuracy If supported by the downsampling strategy, the accuracy goal for the downsampler. Higher accuracy will generally * require higher memory usage. An accuracy value of 0.0001 tells the strategy to try and ensure the emitted proportion * is within proportion +/0 0.0001. * @param seed The seed value to use for any random process used in down-sampling. */ public static DownsamplingIterator make(final Iterator<SAMRecord> iterator, final Strategy strategy, final double proportion, final double accuracy, final int seed) { if (strategy == null) throw new IllegalArgumentException("strategy may not be null"); if (iterator == null) throw new IllegalArgumentException("iterator may not be null"); if (proportion < 0) throw new IllegalArgumentException("proportion must be greater than 0"); if (proportion > 1) throw new IllegalArgumentException("proportion must be less than 1"); switch (strategy) { case HighAccuracy: return new HighAccuracyDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); case ConstantMemory: return new ConstantMemoryDownsamplingIterator(iterator, proportion, seed); case Chained: return new ChainedDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); default: throw new IllegalStateException("Unexpected value for Strategy enum in switch statement. Bug!!"); } }
/** * Buffers reads until either the end of the file is reached or enough reads have been buffered such * that downsampling can be performed to the desired target accuracy. Once reads have been buffered, * template names are randomly sampled out for discarding until the desired number of reads have * been discarded. * * @return True if one or more reads have been buffered, false otherwise */ protected boolean bufferNextChunkOfRecords(final double proportion, final double accuracy) { final int templatesToRead = (int) Math.ceil(1 / accuracy); final Set<String> names = new HashSet<String>(); final List<SAMRecord> recs = new ArrayList<SAMRecord>(templatesToRead); readFromUnderlyingIterator(recs, names, templatesToRead); // Determine how many templates to keep/discard final int templatesRead = names.size(); final int templatesToKeep = calculateTemplatesToKeep(templatesRead, proportion); // Randomly shuffle a list of all the template names, and then remove some from the set final int templatesToDiscard = templatesRead - templatesToKeep; final List<String> tmp = new ArrayList<String>(names); Collections.shuffle(tmp, this.random); for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i)); // Set all the instance state so that advance()/next() get what they need this.bufferedRecordsToKeep = names; this.bufferedRecords = recs.iterator(); this.totalTemplates += templatesRead; this.keptTemplates += names.size(); return !recs.isEmpty(); }
/** * Resets statistics before reading from the underlying iterator. */ @Override protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) { // Reset the stats on the underlying iterator ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics(); // Read from the underlying iterator super.readFromUnderlyingIterator(recs, names, templatesToRead); }
@Override protected int calculateTemplatesToKeep(final int templatesRead, final double overallProportion) { // Calculate an adjusted proportion to keep, knowing what proportion the underlying iterator discarded final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator(); final double priorProportion = iter.getAcceptedFraction(); final double p = Math.max(0, Math.min(1, overallProportion / priorProportion)); final int retval = super.calculateTemplatesToKeep(templatesRead, p); // Record all the discarded records to keep the overall statistics accurate, but do it after // the call to super() so it doesn't affect the proportion calculation. recordDiscardRecords(iter.getDiscardedCount()); return retval; } }
this.nextRecord = null; while (this.nextRecord == null && (this.bufferedRecords.hasNext() || bufferNextChunkOfRecords(getTargetProportion(), this.targetAccuracy))) { final SAMRecord rec = this.bufferedRecords.next(); final String key = rec.getReadName(); recordAcceptedRecord(); recordDiscardedRecord();
/** * Creates a new DownsamplingIterator using the supplied Strategy that attempts to read from the provided iterator and return * approximately proportion of the records read. * * @param iterator The iterator from which to consume SAMRecords * @param strategy The downsampling strategy to use * @param proportion The proportion of records the downsampling strategy should attempt to emit * @param accuracy If supported by the downsampling strategy, the accuracy goal for the downsampler. Higher accuracy will generally * require higher memory usage. An accuracy value of 0.0001 tells the strategy to try and ensure the emitted proportion * is within proportion +/0 0.0001. * @param seed The seed value to use for any random process used in down-sampling. */ public static DownsamplingIterator make(final Iterator<SAMRecord> iterator, final Strategy strategy, final double proportion, final double accuracy, final int seed) { if (strategy == null) throw new IllegalArgumentException("strategy may not be null"); if (iterator == null) throw new IllegalArgumentException("iterator may not be null"); if (proportion < 0) throw new IllegalArgumentException("proportion must be greater than 0"); if (proportion > 1) throw new IllegalArgumentException("proportion must be less than 1"); switch (strategy) { case HighAccuracy: return new HighAccuracyDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); case ConstantMemory: return new ConstantMemoryDownsamplingIterator(iterator, proportion, seed); case Chained: return new ChainedDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); default: throw new IllegalStateException("Unexpected value for Strategy enum in switch statement. Bug!!"); } }
/** * Buffers reads until either the end of the file is reached or enough reads have been buffered such * that downsampling can be performed to the desired target accuracy. Once reads have been buffered, * template names are randomly sampled out for discarding until the desired number of reads have * been discarded. * * @return True if one or more reads have been buffered, false otherwise */ protected boolean bufferNextChunkOfRecords(final double proportion, final double accuracy) { final int templatesToRead = (int) Math.ceil(1 / accuracy); final Set<String> names = new HashSet<String>(); final List<SAMRecord> recs = new ArrayList<SAMRecord>(templatesToRead); readFromUnderlyingIterator(recs, names, templatesToRead); // Determine how many templates to keep/discard final int templatesRead = names.size(); final int templatesToKeep = calculateTemplatesToKeep(templatesRead, proportion); // Randomly shuffle a list of all the template names, and then remove some from the set final int templatesToDiscard = templatesRead - templatesToKeep; final List<String> tmp = new ArrayList<String>(names); Collections.shuffle(tmp, this.random); for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i)); // Set all the instance state so that advance()/next() get what they need this.bufferedRecordsToKeep = names; this.bufferedRecords = recs.iterator(); this.totalTemplates += templatesRead; this.keptTemplates += names.size(); return !recs.isEmpty(); }
/** * Resets statistics before reading from the underlying iterator. */ @Override protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) { // Reset the stats on the underlying iterator ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics(); // Read from the underlying iterator super.readFromUnderlyingIterator(recs, names, templatesToRead); }
@Override protected int calculateTemplatesToKeep(final int templatesRead, final double overallProportion) { // Calculate an adjusted proportion to keep, knowing what proportion the underlying iterator discarded final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator(); final double priorProportion = iter.getAcceptedFraction(); final double p = Math.max(0, Math.min(1, overallProportion / priorProportion)); final int retval = super.calculateTemplatesToKeep(templatesRead, p); // Record all the discarded records to keep the overall statistics accurate, but do it after // the call to super() so it doesn't affect the proportion calculation. recordDiscardRecords(iter.getDiscardedCount()); return retval; } }
/** Returns true if there is another record available post-downsampling, false otherwise. */ @Override public boolean hasNext() { return this.nextRecord != null || advance(); }
/** Returns the next record from the iterator, or throws an exception if there is no next record. */ @Override public SAMRecord next() { if (this.nextRecord == null) { throw new NoSuchElementException("Call to next() when hasNext() == false"); } else { final SAMRecord retval = this.nextRecord; advance(); return retval; } }
/** Returns the next record from the iterator, or throws an exception if there is no next record. */ @Override public SAMRecord next() { if (this.nextRecord == null) { throw new NoSuchElementException("Call to next() when hasNext() == false"); } else { final SAMRecord retval = this.nextRecord; advance(); return retval; } }