public double getStandardDeviation() { final double mean = getMean(); double count = 0; double total = 0; for (final Bin<K> bin : map.values()) { final double localCount = bin.getValue(); final double value = bin.getIdValue(); count += localCount; total += localCount * pow(value - mean, 2); } return Math.sqrt(total / (count-1)); }
/** * Returns the sum of the number of entries in each bin. */ public double getSumOfValues() { double total = 0; for (final Bin<K> bin : map.values()) { total += bin.getValue(); } return total; }
private double effectiveNumberOfBases(Histogram<?> observations) { double totalObservations = observations.getSumOfValues(); // Convert to log base 4 so that the entropy is now a measure // of the effective number of DNA bases. If we used log(2.0) // our result would be in bits. double entropyBaseE = observations.values().stream().collect(Collectors.summingDouble( v -> {double p = v.getValue() / totalObservations; return -p * Math.log(p);})); return entropyBaseE / MathUtil.LOG_4_BASE_E; } }
/** Gets the median absolute deviation of the distribution. */ public double getMedianAbsoluteDeviation() { final double median = getMedian(); final Histogram<Double> deviations = new Histogram<>(); for (final Bin<K> bin : values()) { final double dev = abs(bin.getIdValue() - median); deviations.increment(dev, bin.getValue()); } return deviations.getMedian(); }
/*** * Mutable method that allows the addition of a Histogram into the current one. * @param addHistogram */ public void addHistogram(final Histogram<K> addHistogram) { for (final K key : addHistogram.keySet()){ this.increment(key, addHistogram.get(key).getValue()); } }
@Test public void testPlatformMissing() throws Exception { final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT) .open((new File(TEST_DATA_DIR, "missing_platform_unit.sam"))); final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE); Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_PLATFORM_VALUE.getHistogramString()).getValue(), 1.0); }
@Test public void testSeqQualMismatch() throws Exception { final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT) .open(new File(TEST_DATA_DIR, "seq_qual_len_mismatch.sam")); final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE); Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_SEQ_QUAL_LENGTH.getHistogramString()).getValue(), 8.0); }
@Test public void testCigarOffEndOfReferenceValidation() throws Exception { final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder(); samBuilder.addFrag(String.valueOf(0), 0, 1, false); final int contigLength = samBuilder.getHeader().getSequence(0).getSequenceLength(); // Should hang off the end. samBuilder.addFrag(String.valueOf(1), 0, contigLength - 1, false); final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE); Assert.assertNotNull(results.get(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE.getHistogramString())); Assert.assertEquals(results.get(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE.getHistogramString()).getValue(), 1.0); }
@Test(dataProvider = "testTruncatedScenarios") public void testTruncated(final String scenario, final String inputFile, final SAMValidationError.Type expectedError) throws Exception { final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, inputFile)); final Histogram<String> results = executeValidation(reader, null, IndexValidationStringency.EXHAUSTIVE); Assert.assertNotNull(results.get(expectedError.getHistogramString()), scenario); Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0, scenario); }
@Test public void testSortOrder() throws IOException { Histogram<String> results = executeValidation(SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT) .open(new File(TEST_DATA_DIR, "invalid_coord_sort_order.sam")), null, IndexValidationStringency.EXHAUSTIVE); Assert.assertEquals(results.get(SAMValidationError.Type.RECORD_OUT_OF_ORDER.getHistogramString()).getValue(), 1.0); results = executeValidation(SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT) .open(new File(TEST_DATA_DIR, "invalid_queryname_sort_order.sam")), null, IndexValidationStringency.EXHAUSTIVE); Assert.assertEquals(results.get(SAMValidationError.Type.RECORD_OUT_OF_ORDER.getHistogramString()).getValue(), 5.0); }
/*** * Mutable method that allows the addition of a Histogram into the current one. * @param addHistogram */ public void addHistogram(final Histogram<K> addHistogram) { for (final K key : addHistogram.keySet()){ this.increment(key, addHistogram.get(key).getValue()); } } }
/** Gets the median absolute deviation of the distribution. */ public double getMedianAbsoluteDeviation() { final double median = getMedian(); final Histogram<Double> deviations = new Histogram<>(); for (final Bin<K> bin : values()) { final double dev = abs(bin.getIdValue() - median); deviations.increment(dev, bin.getValue()); } return deviations.getMedian(); }
@Test(enabled = false) public void duplicateReads() throws Exception { final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, "duplicated_reads.sam")); final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE); Assert.assertFalse(results.isEmpty()); Assert.assertEquals(results.get(SAMValidationError.Type.MATES_ARE_SAME_END.getHistogramString()).getValue(), 2.0); }
@Test public void testPlatformInvalid() throws Exception { final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT) .open((new File(TEST_DATA_DIR, "invalid_platform_unit.sam"))); final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE); Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_PLATFORM_VALUE.getHistogramString()).getValue(), 1.0); }
@Test(dataProvider = "testMateCigarScenarios") public void testMateCigarScenarios(final String scenario, final String inputFile, final SAMValidationError.Type expectedError) throws Exception { final SamReader reader = SamReaderFactory.makeDefault().open(new File(TEST_DATA_DIR, inputFile)); final Histogram<String> results = executeValidation(reader, null, IndexValidationStringency.EXHAUSTIVE); Assert.assertNotNull(results.get(expectedError.getHistogramString()), scenario); Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0, scenario); }
public double getStandardDeviation() { final double mean = getMean(); double count = 0; double total = 0; for (final Bin bin : values()) { final double localCount = bin.getValue(); final double value = bin.getIdValue(); count += localCount; total += localCount * pow(value - mean, 2); } return Math.sqrt(total / (count-1)); }
/** * Returns the sum of the number of entries in each bin. */ public double getSumOfValues() { double total = 0; for (final Bin bin : values()) { total += bin.getValue(); } return total; }
/** * Returns the count defined by the truth state set and call state set. */ public long getCount(final TruthAndCallStates truthAndCallStates) { final Histogram.Bin<TruthAndCallStates> bin = this.counter.get(truthAndCallStates); return (bin == null ? 0L : (long) bin.getValue()); }
/** * Returns the count defined by the truth state set and call state set. */ public long getCount(final TruthAndCallStates truthAndCallStates) { final Histogram.Bin<TruthAndCallStates> bin = this.counter.get(truthAndCallStates); return (bin == null ? 0L : (long) bin.getValue()); }