default DoubleColumn bin(int binCount) { double[] histogram = new double[binCount]; EmpiricalDistribution distribution = new EmpiricalDistribution(binCount); distribution.load(asDoubleArray()); int k = 0; for(SummaryStatistics stats: distribution.getBinStats()) { histogram[k++] = stats.getN(); } return DoubleColumn.create(name() + "[binned]", histogram); }
return getSupportLowerBound(); return getSupportUpperBound(); while (cumBinP(i) < p) { i++; final RealDistribution kernel = getKernel(binStats.get(i)); final double kB = kB(i); final double[] binBounds = getUpperBounds(); final double lower = i == 0 ? min : binBounds[i - 1]; final double kBminus = kernel.cumulativeProbability(lower); final double pB = pB(i); final double pBminus = pBminus(i); final double pCrit = p - pBminus; if (pCrit <= 0) {
fillBinStats(new StreamDataAdapter(in)); loaded = true; } finally {
/** * {@inheritDoc} * * <p>Returns the kernel density normalized so that its integral over each bin * equals the bin mass.</p> * * <p>Algorithm description: <ol> * <li>Find the bin B that x belongs to.</li> * <li>Compute K(B) = the mass of B with respect to the within-bin kernel (i.e., the * integral of the kernel density over B).</li> * <li>Return k(x) * P(B) / K(B), where k is the within-bin kernel density * and P(B) is the mass of B.</li></ol></p> * @since 3.1 */ public double density(double x) { if (x < min || x > max) { return 0d; } final int binIndex = findBin(x); final RealDistribution kernel = getKernel(binStats.get(binIndex)); return kernel.density(x) * pB(binIndex) / kB(binIndex); }
/** * Gets a random value in DIGEST_MODE. * <p> * <strong>Preconditions</strong>: <ul> * <li>Before this method is called, <code>computeDistribution()</code> * must have completed successfully; otherwise an * <code>IllegalStateException</code> will be thrown</li></ul></p> * * @return next random value from the empirical distribution digest * @throws MathIllegalStateException if digest has not been initialized */ private double getNextDigest() throws MathIllegalStateException { if ((empiricalDistribution == null) || (empiricalDistribution.getBinStats().size() == 0)) { throw new MathIllegalStateException(LocalizedFormats.DIGEST_NOT_INITIALIZED); } return empiricalDistribution.getNextValue(); }
return 1d; final int binIndex = findBin(x); final double pBminus = pBminus(binIndex); final double pB = pB(binIndex); final RealDistribution kernel = k(x); if (kernel instanceof ConstantRealDistribution) { if (x < kernel.getNumericalMean()) { final double[] binBounds = getUpperBounds(); final double kB = kB(binIndex); final double lower = binIndex == 0 ? min : binBounds[binIndex - 1]; final double withinBinCum =
/** * Computes the empirical distribution using values from the file * in <code>valuesFileURL</code> and <code>binCount</code> bins. * <p> * <code>valuesFileURL</code> must exist and be readable by this process * at runtime.</p> * <p> * This method must be called before using <code>getNext()</code> * with <code>mode = DIGEST_MODE</code></p> * * @param binCount the number of bins used in computing the empirical * distribution * @throws NullArgumentException if the {@code valuesFileURL} has not been set * @throws IOException if an error occurs reading the input file * @throws ZeroException if URL contains no data */ public void computeDistribution(int binCount) throws NullArgumentException, IOException, ZeroException { empiricalDistribution = new EmpiricalDistribution(binCount, randomData.getRandomGenerator()); empiricalDistribution.load(valuesFileURL); mu = empiricalDistribution.getSampleStats().getMean(); sigma = empiricalDistribution.getSampleStats().getStandardDeviation(); }
EmpiricalDistribution distribution = new EmpiricalDistribution(bins); distribution.load(((List<?>)sourceValues).stream().mapToDouble(value -> ((Number)value).doubleValue()).toArray());; for(SummaryStatistics binSummary : distribution.getBinStats()) { Map<String,Number> map = new HashMap<>(); map.put("max", binSummary.getMax()); map.put("N", binSummary.getN()); map.put("var", binSummary.getVariance()); map.put("cumProb", distribution.cumulativeProbability(binSummary.getMean())); map.put("prob", distribution.probability(binSummary.getMin(), binSummary.getMax())); histogramBins.add(new Tuple(map));
/** * Mass of bin i under the within-bin kernel of the bin. * * @param i index of the bin * @return the difference in the within-bin kernel cdf between the * upper and lower endpoints of bin i */ @SuppressWarnings("deprecation") private double kB(int i) { final double[] binBounds = getUpperBounds(); final RealDistribution kernel = getKernel(binStats.get(i)); return i == 0 ? kernel.cumulativeProbability(min, binBounds[0]) : kernel.cumulativeProbability(binBounds[i - 1], binBounds[i]); }
@Override public Object doWork(Object value) throws IOException { if(!(value instanceof List<?>)){ throw new StreamEvaluatorException("List value expected but found type %s for value %s", value.getClass().getName(), value.toString()); } EmpiricalDistribution empiricalDistribution = new EmpiricalDistribution(); double[] backingValues = ((List<?>)value).stream().mapToDouble(innerValue -> ((Number)innerValue).doubleValue()).sorted().toArray(); empiricalDistribution.load(backingValues); return empiricalDistribution; } }
final int BIN_COUNT = 20; double[] data = {1.2, 0.2, 0.333, 1.4, 1.5, 1.2, 1.3, 10.4, 1, 2.0}; long[] histogram = new long[BIN_COUNT]; org.apache.commons.math3.random.EmpiricalDistribution distribution = new org.apache.commons.math3.random.EmpiricalDistribution(BIN_COUNT); distribution.load(data); int k = 0; for(org.apache.commons.math3.stat.descriptive.SummaryStatistics stats: distribution.getBinStats()) { histogram[k++] = stats.getN(); }
return 1d; final int binIndex = findBin(x); final double pBminus = pBminus(binIndex); final double pB = pB(binIndex); final RealDistribution kernel = k(x); if (kernel instanceof ConstantRealDistribution) { if (x < kernel.getNumericalMean()) { final double[] binBounds = getUpperBounds(); final double kB = kB(binIndex); final double lower = binIndex == 0 ? min : binBounds[binIndex - 1]; final double withinBinCum =
/** * {@inheritDoc} * * <p>Returns the kernel density normalized so that its integral over each bin * equals the bin mass.</p> * * <p>Algorithm description: <ol> * <li>Find the bin B that x belongs to.</li> * <li>Compute K(B) = the mass of B with respect to the within-bin kernel (i.e., the * integral of the kernel density over B).</li> * <li>Return k(x) * P(B) / K(B), where k is the within-bin kernel density * and P(B) is the mass of B.</li></ol></p> * @since 3.1 */ public double density(double x) { if (x < min || x > max) { return 0d; } final int binIndex = findBin(x); final RealDistribution kernel = getKernel(binStats.get(binIndex)); return kernel.density(x) * pB(binIndex) / kB(binIndex); }
/** * Computes the empirical distribution using values from the file * in <code>valuesFileURL</code> and <code>binCount</code> bins. * <p> * <code>valuesFileURL</code> must exist and be readable by this process * at runtime.</p> * <p> * This method must be called before using <code>getNext()</code> * with <code>mode = DIGEST_MODE</code></p> * * @param binCount the number of bins used in computing the empirical * distribution * @throws NullArgumentException if the {@code valuesFileURL} has not been set * @throws IOException if an error occurs reading the input file * @throws ZeroException if URL contains no data */ public void computeDistribution(int binCount) throws NullArgumentException, IOException, ZeroException { empiricalDistribution = new EmpiricalDistribution(binCount, randomData.getRandomGenerator()); empiricalDistribution.load(valuesFileURL); mu = empiricalDistribution.getSampleStats().getMean(); sigma = empiricalDistribution.getSampleStats().getStandardDeviation(); }
/** * Mass of bin i under the within-bin kernel of the bin. * * @param i index of the bin * @return the difference in the within-bin kernel cdf between the * upper and lower endpoints of bin i */ @SuppressWarnings("deprecation") private double kB(int i) { final double[] binBounds = getUpperBounds(); final RealDistribution kernel = getKernel(binStats.get(i)); return i == 0 ? kernel.cumulativeProbability(min, binBounds[0]) : kernel.cumulativeProbability(binBounds[i - 1], binBounds[i]); }
/** * Gets a random value in DIGEST_MODE. * <p> * <strong>Preconditions</strong>: <ul> * <li>Before this method is called, <code>computeDistribution()</code> * must have completed successfully; otherwise an * <code>IllegalStateException</code> will be thrown</li></ul></p> * * @return next random value from the empirical distribution digest * @throws MathIllegalStateException if digest has not been initialized */ private double getNextDigest() throws MathIllegalStateException { if ((empiricalDistribution == null) || (empiricalDistribution.getBinStats().size() == 0)) { throw new MathIllegalStateException(LocalizedFormats.DIGEST_NOT_INITIALIZED); } return empiricalDistribution.getNextValue(); }
return getSupportLowerBound(); return getSupportUpperBound(); while (cumBinP(i) < p) { i++; final RealDistribution kernel = getKernel(binStats.get(i)); final double kB = kB(i); final double[] binBounds = getUpperBounds(); final double lower = i == 0 ? min : binBounds[i - 1]; final double kBminus = kernel.cumulativeProbability(lower); final double pB = pB(i); final double pBminus = pBminus(i); final double pCrit = p - pBminus; if (pCrit <= 0) {
return 1d; final int binIndex = findBin(x); final double pBminus = pBminus(binIndex); final double pB = pB(binIndex); final RealDistribution kernel = k(x); if (kernel instanceof ConstantRealDistribution) { if (x < kernel.getNumericalMean()) { final double[] binBounds = getUpperBounds(); final double kB = kB(binIndex); final double lower = binIndex == 0 ? min : binBounds[binIndex - 1]; final double withinBinCum =