/** * Buffers reads until either the end of the file is reached or enough reads have been buffered such * that downsampling can be performed to the desired target accuracy. Once reads have been buffered, * template names are randomly sampled out for discarding until the desired number of reads have * been discarded. * * @return True if one or more reads have been buffered, false otherwise */ protected boolean bufferNextChunkOfRecords(final double proportion, final double accuracy) { final int templatesToRead = (int) Math.ceil(1 / accuracy); final Set<String> names = new HashSet<String>(); final List<SAMRecord> recs = new ArrayList<SAMRecord>(templatesToRead); readFromUnderlyingIterator(recs, names, templatesToRead); // Determine how many templates to keep/discard final int templatesRead = names.size(); final int templatesToKeep = calculateTemplatesToKeep(templatesRead, proportion); // Randomly shuffle a list of all the template names, and then remove some from the set final int templatesToDiscard = templatesRead - templatesToKeep; final List<String> tmp = new ArrayList<String>(names); Collections.shuffle(tmp, this.random); for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i)); // Set all the instance state so that advance()/next() get what they need this.bufferedRecordsToKeep = names; this.bufferedRecords = recs.iterator(); this.totalTemplates += templatesRead; this.keptTemplates += names.size(); return !recs.isEmpty(); }
/** * Buffers reads until either the end of the file is reached or enough reads have been buffered such * that downsampling can be performed to the desired target accuracy. Once reads have been buffered, * template names are randomly sampled out for discarding until the desired number of reads have * been discarded. * * @return True if one or more reads have been buffered, false otherwise */ protected boolean bufferNextChunkOfRecords(final double proportion, final double accuracy) { final int templatesToRead = (int) Math.ceil(1 / accuracy); final Set<String> names = new HashSet<String>(); final List<SAMRecord> recs = new ArrayList<SAMRecord>(templatesToRead); readFromUnderlyingIterator(recs, names, templatesToRead); // Determine how many templates to keep/discard final int templatesRead = names.size(); final int templatesToKeep = calculateTemplatesToKeep(templatesRead, proportion); // Randomly shuffle a list of all the template names, and then remove some from the set final int templatesToDiscard = templatesRead - templatesToKeep; final List<String> tmp = new ArrayList<String>(names); Collections.shuffle(tmp, this.random); for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i)); // Set all the instance state so that advance()/next() get what they need this.bufferedRecordsToKeep = names; this.bufferedRecords = recs.iterator(); this.totalTemplates += templatesRead; this.keptTemplates += names.size(); return !recs.isEmpty(); }
@Override protected int calculateTemplatesToKeep(final int templatesRead, final double overallProportion) { // Calculate an adjusted proportion to keep, knowing what proportion the underlying iterator discarded final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator(); final double priorProportion = iter.getAcceptedFraction(); final double p = Math.max(0, Math.min(1, overallProportion / priorProportion)); final int retval = super.calculateTemplatesToKeep(templatesRead, p); // Record all the discarded records to keep the overall statistics accurate, but do it after // the call to super() so it doesn't affect the proportion calculation. recordDiscardRecords(iter.getDiscardedCount()); return retval; } }
@Override protected int calculateTemplatesToKeep(final int templatesRead, final double overallProportion) { // Calculate an adjusted proportion to keep, knowing what proportion the underlying iterator discarded final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator(); final double priorProportion = iter.getAcceptedFraction(); final double p = Math.max(0, Math.min(1, overallProportion / priorProportion)); final int retval = super.calculateTemplatesToKeep(templatesRead, p); // Record all the discarded records to keep the overall statistics accurate, but do it after // the call to super() so it doesn't affect the proportion calculation. recordDiscardRecords(iter.getDiscardedCount()); return retval; } }