/** * Computes p-value for 2-sided, 1-sample t-test. * * @param m sample mean * @param mu constant to test against * @param v sample variance * @param n sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double tTest(double m, double mu, double v, double n) throws MathException { double t = Math.abs(t(m, mu, v, n)); distribution.setDegreesOfFreedom(n - 1); return 1.0 - distribution.cumulativeProbability(-t, t); }
/** * Modify the distribution used to compute inference statistics. * @param value the new distribution * @since 1.2 */ public void setDistribution(TDistribution value) { distribution = value; // modify degrees of freedom if (n > 2) { distribution.setDegreesOfFreedom(n - 2); } } }
/** * Returns the significance level of the slope (equiv) correlation. * <p> * Specifically, the returned value is the smallest <code>alpha</code> * such that the slope confidence interval with significance level * equal to <code>alpha</code> does not include <code>0</code>. * On regression output, this is often denoted <code>Prob(|t| > 0)</code> * </p><p> * <strong>Usage Note</strong>:<br> * The validity of this statistic depends on the assumption that the * observations included in the model are drawn from a * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"> * Bivariate Normal Distribution</a>.</p> * <p> * If there are fewer that <strong>three</strong> observations in the * model, or if there is no variation in x, this returns * <code>Double.NaN</code>.</p> * * @return significance level for slope/correlation * @throws MathException if the significance level can not be computed. */ public double getSignificance() throws MathException { return 2d * (1.0 - distribution.cumulativeProbability( Math.abs(getSlope()) / getSlopeStdErr())); }
distribution.inverseCumulativeProbability(1d - alpha / 2d);
/** * Modify the distribution used to compute inference statistics. * @param value the new distribution * @since 1.2 */ public void setDistribution(TDistribution value) { distribution = value; // modify degrees of freedom if (n > 2) { distribution.setDegreesOfFreedom(n - 2); } } }
/** * Returns the significance level of the slope (equiv) correlation. * <p> * Specifically, the returned value is the smallest <code>alpha</code> * such that the slope confidence interval with significance level * equal to <code>alpha</code> does not include <code>0</code>. * On regression output, this is often denoted <code>Prob(|t| > 0)</code> * </p><p> * <strong>Usage Note</strong>:<br> * The validity of this statistic depends on the assumption that the * observations included in the model are drawn from a * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"> * Bivariate Normal Distribution</a>.</p> * <p> * If there are fewer that <strong>three</strong> observations in the * model, or if there is no variation in x, this returns * <code>Double.NaN</code>.</p> * * @return significance level for slope/correlation * @throws MathException if the significance level can not be computed. */ public double getSignificance() throws MathException { return 2d * (1.0 - distribution.cumulativeProbability( Math.abs(getSlope()) / getSlopeStdErr())); }
distribution.inverseCumulativeProbability(1d - alpha / 2d);
/** * Computes p-value for 2-sided, 1-sample t-test. * * @param m sample mean * @param mu constant to test against * @param v sample variance * @param n sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double tTest(double m, double mu, double v, double n) throws MathException { double t = Math.abs(t(m, mu, v, n)); distribution.setDegreesOfFreedom(n - 1); return 2.0 * distribution.cumulativeProbability(-t); }
/** * Removes the observation (x,y) from the regression data set. * <p> * Mirrors the addData method. This method permits the use of * SimpleRegression instances in streaming mode where the regression * is applied to a sliding "window" of observations, however the caller is * responsible for maintaining the set of observations in the window.</p> * * The method has no effect if there are no points of data (i.e. n=0) * * @param x independent variable value * @param y dependent variable value */ public void removeData(double x, double y) { if (n > 0) { double dx = x - xbar; double dy = y - ybar; sumXX -= dx * dx * (double) n / (n - 1d); sumYY -= dy * dy * (double) n / (n - 1d); sumXY -= dx * dy * (double) n / (n - 1d); xbar -= dx / (n - 1.0); ybar -= dy / (n - 1.0); sumX -= x; sumY -= y; n--; if (n > 2) { distribution.setDegreesOfFreedom(n - 2); } } }
/** * Returns a matrix of p-values associated with the (two-sided) null * hypothesis that the corresponding correlation coefficient is zero. * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability * that a random variable distributed as <code>t<sub>n-2</sub></code> takes * a value with absolute value greater than or equal to <br> * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p> * <p>The values in the matrix are sometimes referred to as the * <i>significance</i> of the corresponding correlation coefficients.</p> * * @return matrix of p-values * @throws MathException if an error occurs estimating probabilities */ public RealMatrix getCorrelationPValues() throws MathException { TDistribution tDistribution = new TDistributionImpl(nObs - 2); int nVars = correlationMatrix.getColumnDimension(); double[][] out = new double[nVars][nVars]; for (int i = 0; i < nVars; i++) { for (int j = 0; j < nVars; j++) { if (i == j) { out[i][j] = 0d; } else { double r = correlationMatrix.getEntry(i, j); double t = Math.abs(r * Math.sqrt((nObs - 2)/(1 - r * r))); out[i][j] = 2 * (1 - tDistribution.cumulativeProbability(t)); } } } return new BlockRealMatrix(out); }
distribution.inverseCumulativeProbability(1d - alpha / 2d);
/** * Computes p-value for 2-sided, 2-sample t-test, under the assumption * of equal subpopulation variances. * <p> * The sum of the sample sizes minus 2 is used as degrees of freedom.</p> * * @param m1 first sample mean * @param m2 second sample mean * @param v1 first sample variance * @param v2 second sample variance * @param n1 first sample n * @param n2 second sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double homoscedasticTTest(double m1, double m2, double v1, double v2, double n1, double n2) throws MathException { double t = Math.abs(homoscedasticT(m1, m2, v1, v2, n1, n2)); double degreesOfFreedom = n1 + n2 - 2; distribution.setDegreesOfFreedom(degreesOfFreedom); return 2.0 * distribution.cumulativeProbability(-t); }
/** * Removes the observation (x,y) from the regression data set. * <p> * Mirrors the addData method. This method permits the use of * SimpleRegression instances in streaming mode where the regression * is applied to a sliding "window" of observations, however the caller is * responsible for maintaining the set of observations in the window.</p> * * The method has no effect if there are no points of data (i.e. n=0) * * @param x independent variable value * @param y dependent variable value */ public void removeData(double x, double y) { if (n > 0) { double dx = x - xbar; double dy = y - ybar; sumXX -= dx * dx * (double) n / (n - 1d); sumYY -= dy * dy * (double) n / (n - 1d); sumXY -= dx * dy * (double) n / (n - 1d); xbar -= dx / (n - 1.0); ybar -= dy / (n - 1.0); sumX -= x; sumY -= y; n--; if (n > 2) { distribution.setDegreesOfFreedom(n - 2); } } }
/** * Returns the significance level of the slope (equiv) correlation. * <p> * Specifically, the returned value is the smallest <code>alpha</code> * such that the slope confidence interval with significance level * equal to <code>alpha</code> does not include <code>0</code>. * On regression output, this is often denoted <code>Prob(|t| > 0)</code> * </p><p> * <strong>Usage Note</strong>:<br> * The validity of this statistic depends on the assumption that the * observations included in the model are drawn from a * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"> * Bivariate Normal Distribution</a>.</p> * <p> * If there are fewer that <strong>three</strong> observations in the * model, or if there is no variation in x, this returns * <code>Double.NaN</code>.</p> * * @return significance level for slope/correlation * @throws MathException if the significance level can not be computed. */ public double getSignificance() throws MathException { return 2d * (1.0 - distribution.cumulativeProbability( FastMath.abs(getSlope()) / getSlopeStdErr())); }
/** * Computes p-value for 2-sided, 2-sample t-test, under the assumption * of equal subpopulation variances. * <p> * The sum of the sample sizes minus 2 is used as degrees of freedom.</p> * * @param m1 first sample mean * @param m2 second sample mean * @param v1 first sample variance * @param v2 second sample variance * @param n1 first sample n * @param n2 second sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double homoscedasticTTest(double m1, double m2, double v1, double v2, double n1, double n2) throws MathException { double t = Math.abs(homoscedasticT(m1, m2, v1, v2, n1, n2)); double degreesOfFreedom = (double) (n1 + n2 - 2); distribution.setDegreesOfFreedom(degreesOfFreedom); return 1.0 - distribution.cumulativeProbability(-t, t); }
/** * Modify the distribution used to compute inference statistics. * @param value the new distribution * @since 1.2 * @deprecated in 2.2 (to be removed in 3.0). */ @Deprecated public void setDistribution(TDistribution value) { distribution = value; // modify degrees of freedom if (n > 2) { distribution.setDegreesOfFreedom(n - 2); } } }
/** * Returns a matrix of p-values associated with the (two-sided) null * hypothesis that the corresponding correlation coefficient is zero. * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability * that a random variable distributed as <code>t<sub>n-2</sub></code> takes * a value with absolute value greater than or equal to <br> * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p> * <p>The values in the matrix are sometimes referred to as the * <i>significance</i> of the corresponding correlation coefficients.</p> * * @return matrix of p-values * @throws MathException if an error occurs estimating probabilities */ public RealMatrix getCorrelationPValues() throws MathException { TDistribution tDistribution = new TDistributionImpl(nObs - 2); int nVars = correlationMatrix.getColumnDimension(); double[][] out = new double[nVars][nVars]; for (int i = 0; i < nVars; i++) { for (int j = 0; j < nVars; j++) { if (i == j) { out[i][j] = 0d; } else { double r = correlationMatrix.getEntry(i, j); double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r))); out[i][j] = 2 * tDistribution.cumulativeProbability(-t); } } } return new BlockRealMatrix(out); }
/** * Computes p-value for 2-sided, 1-sample t-test. * * @param m sample mean * @param mu constant to test against * @param v sample variance * @param n sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double tTest(double m, double mu, double v, double n) throws MathException { double t = FastMath.abs(t(m, mu, v, n)); distribution.setDegreesOfFreedom(n - 1); return 2.0 * distribution.cumulativeProbability(-t); }
distribution.setDegreesOfFreedom(n - 2);
/** * Computes p-value for 2-sided, 2-sample t-test, under the assumption * of equal subpopulation variances. * <p> * The sum of the sample sizes minus 2 is used as degrees of freedom.</p> * * @param m1 first sample mean * @param m2 second sample mean * @param v1 first sample variance * @param v2 second sample variance * @param n1 first sample n * @param n2 second sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double homoscedasticTTest(double m1, double m2, double v1, double v2, double n1, double n2) throws MathException { double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2)); double degreesOfFreedom = n1 + n2 - 2; distribution.setDegreesOfFreedom(degreesOfFreedom); return 2.0 * distribution.cumulativeProbability(-t); }