OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.setNoIntercept(true); boolean ignoreLeaderBytesOut = !isLeaderBytesInAndOutRatioDiverseEnough(); regression.newSampleData(aggregateSampleCpuUtilData(), aggregateSampleBytesRateData(ignoreLeaderBytesOut)); double[] parameters = regression.estimateRegressionParameters(); int leaderBytesInIndex = 0; int leaderBytesOutIndex = 1;
/** * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}, * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number * of observations and p is the number of parameters estimated (including the intercept).</p> * * <p>If the regression is estimated without an intercept term, what is returned is <pre> * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code> * </pre></p> * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return adjusted R-Squared statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @see #isNoIntercept() * @since 2.2 */ public double calculateAdjustedRSquared() { final double n = getX().getRowDimension(); if (isNoIntercept()) { return 1 - (1 - calculateRSquared()) * (n / (n - getX().getColumnDimension())); } else { return 1 - (calculateResidualSumOfSquares() * (n - 1)) / (calculateTotalSumOfSquares() * (n - getX().getColumnDimension())); } }
/** * Returns the R-Squared statistic, defined by the formula <pre> * R<sup>2</sup> = 1 - SSR / SSTO * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals} * and SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares} * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return R-square statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @since 2.2 */ public double calculateRSquared() { return 1 - calculateResidualSumOfSquares() / calculateTotalSumOfSquares(); }
@Override public void setValues(double[] y, double[] x) { if (x.length != y.length) { throw new IllegalArgumentException(String.format("The numbers of y and x values must be equal (%d != %d)", y.length, x.length)); } double[][] xData = new double[x.length][]; for (int i = 0; i < x.length; i++) { // the implementation determines how to produce a vector of predictors from a single x xData[i] = xVector(x[i]); } if (logY()) { // in some models we are predicting ln y, so we replace each y with ln y y = Arrays.copyOf(y, y.length); // user might not be finished with the array we were given for (int i = 0; i < x.length; i++) { y[i] = Math.log(y[i]); } } final OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired ols.newSampleData(y, xData); // provide the data to the model coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters()); // get our coefs last_error_rate = ols.estimateErrorVariance(); Log.d(TAG, getClass().getSimpleName() + " Forecast Error rate: errorvar:" + JoH.qs(last_error_rate, 4) + " regssionvar:" + JoH.qs(ols.estimateRegressandVariance(), 4) + " stderror:" + JoH.qs(ols.estimateRegressionStandardError(), 4)); }
@Override public void trainSimilarity(List<EnsembleSim> simList) { if (simList.isEmpty()) { throw new IllegalArgumentException("no examples to train on!"); } similarityInterpolator.trainSimilarity(simList); double[][] X = new double[simList.size()][numMetrics]; double[] Y = new double[simList.size()]; for (int i = 0; i<simList.size(); i++){ Y[i]=simList.get(i).knownSim.similarity; EnsembleSim es = similarityInterpolator.interpolate(simList.get(i)); for (int j=0; j<numMetrics; j++){ X[i][j]=es.getScores().get(j); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); simlarityCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are " + simlarityCoefficients.toString()); LOG.info("pearson for multiple regression is " + pearson); }
OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X);
double[] y = {-0.48812477, 0.33458213, -0.52754476, -0.79863471, -0.68544309, -0.12970239, 0.02355622, -0.31890850, 0.34725819, 0.08108851}; double[][] x = {{1,0}, {0,0}, {1,0}, {2,1}, {0,1}, {0,0}, {1,0}, {0,0}, {1,0}, {0,0}}; double[][] xb = {{1,0,0}, {0,0,0}, {1,0,0}, {2,1,2}, {0,1,0}, {0,0,0}, {1,0,0}, {0,0,0}, {1,0,0}, {0,0,0}}; OLSMultipleLinearRegression regr = new OLSMultipleLinearRegression(); int degreesOfFreedomA = y.length - (x[0].length); // no + 1 int degreesOfFreedomB = y.length - (xb[0].length); // no + 1 regr.setNoIntercept(true); regr.newSampleData(y, x); double sumOfSquaresNoInterceptA = regr.calculateResidualSumOfSquares(); regr.newSampleData(y, xb); double sumOfSquaresNoInterceptB = regr.calculateResidualSumOfSquares(); double MSE = sumOfSquaresNoInterceptB / degreesOfFreedomB; System.out.printf("RSS no intercept: %f\n",sumOfSquaresNoInterceptB); int degreesOfFreedomDifference = Math.abs(degreesOfFreedomB - degreesOfFreedomA); double MSEdiff = Math.abs((sumOfSquaresNoInterceptB - sumOfSquaresNoInterceptA) / (degreesOfFreedomDifference)); double Fval = MSEdiff / MSE; FDistribution Fdist = new FDistribution(degreesOfFreedomDifference, degreesOfFreedomB); double pval = 1 - Fdist.cumulative(Fval); System.out.printf("pval without intercept: %f",pval);
OLSMultipleLinearRegression regr = new OLSMultipleLinearRegression(); OLSMultipleLinearRegression regr2 = new OLSMultipleLinearRegression(); x2[9] = new double[] { 0, 0, 0 }; regr.newSampleData(y, x); double[] b = regr.estimateResiduals(); regr2.newSampleData(y, x2); double[] b2 = regr2.estimateResiduals();
OLSMultipleLinearRegression regr = new OLSMultipleLinearRegression(); regr.newSampleData(y, x); double SSR1 = regr.calculateResidualSumOfSquares(); double df1 = y.length - (x[0].length + 1); //df = n - number of coefficients, including intercept regr.newSampleData(y, xb); double SSR2 = regr.calculateResidualSumOfSquares(); double df2 = y.length - (xb[0].length + 1); double MSE = SSR2/df2; // EDIT: You need the biggest model here! double MSEdiff = Math.abs ((SSR2 - SSR1) / (df2 - df1)); double dfdiff = Math.abs(df2 - df1); double Fval = MSEdiff / MSE; FDistribution Fdist = new FDistribution(dfdiff, df2); double pval = 1 - Fdist.cumulativeProbability(Fval);
map.put("regressandVariance", multipleLinearRegression.estimateRegressandVariance()); map.put("regressionParameters", list(multipleLinearRegression.estimateRegressionParameters())); map.put("RSquared", multipleLinearRegression.calculateRSquared()); map.put("adjustedRSquared", multipleLinearRegression.calculateAdjustedRSquared()); map.put("residualSumSquares", multipleLinearRegression.calculateResidualSumOfSquares()); map.put("regressionParametersStandardErrors", list(multipleLinearRegression.estimateRegressionParametersStandardErrors())); map.put("regressionParametersVariance", new Matrix(multipleLinearRegression.estimateRegressionParametersVariance())); } catch (Exception e) {
@Override OlsLRModel train() { double[] dataArray = LR.doubleListToArray(data); R.newSampleData(dataArray, numObs, numVars); params = R.estimateRegressionParameters(); this.state = State.ready; List<Double> paramList = new ArrayList<>(); for (int i = 0; i < numVars; i++) { paramList.add(params[i]); } return this; }
OlsLRModel(String model, boolean intercept, int numVars) { super(model, Framework.OLS); R = new OLSMultipleLinearRegression(); R.setNoIntercept(!intercept); numObs = 0; this.numVars = numVars; } @Override
void setSampleData() { // Fill in the x values for (int x = 0; x < values.length/2; x++) values[(x*2)+1] = x; ols.newSampleData(values, values.length/2, 1); } @Override
/** * Predict using the built model * * @param regression * @param x * @return */ static double predict(OLSMultipleLinearRegression regression, double[] x) { if (regression == null) { throw new IllegalArgumentException("regression must not be null."); } double[] beta = regression.estimateRegressionParameters(); // intercept at beta[0] double prediction = beta[0]; for (int i = 1; i < beta.length; i++) { prediction += beta[i] * x[i - 1]; } // return prediction; }
/** * Gets the correlation coefficients. * * @param data the data * @return the correlation coefficients */ protected List<Double> getCorrelationCoefficients(final double[][] data) { int n = data.length; int m = data[0].length; List<Double> correlationCoefficients = new LinkedList<Double>(); for (int i = 0; i < n; i++) { double[][] x = new double[n - 1][m]; int k = 0; for (int j = 0; j < n; j++) { if (j != i) { x[k++] = data[j]; } } // Transpose the matrix so that it fits the linear model double[][] xT = new Array2DRowRealMatrix(x).transpose().getData(); // RSquare is the "coefficient of determination" correlationCoefficients.add(MathUtil.createLinearRegression(xT, data[i]).calculateRSquared()); } return correlationCoefficients; }
/** * <p>Returns the sum of squared deviations of Y from its mean.</p> * * <p>If the model has no intercept term, <code>0</code> is used for the * mean of Y - i.e., what is returned is the sum of the squared Y values.</p> * * <p>The value returned by this method is the SSTO value used in * the {@link #calculateRSquared() R-squared} computation.</p> * * @return SSTO - the total sum of squares * @throws NullPointerException if the sample has not been set * @see #isNoIntercept() * @since 2.2 */ public double calculateTotalSumOfSquares() { if (isNoIntercept()) { return StatUtils.sumSq(getY().toArray()); } else { return new SecondMoment().evaluate(getY().toArray()); } }
@Override boolean hasConstant() { return !R.isNoIntercept(); }
@Override public void setValues(double[] y, double[] x) { if (x.length != y.length) { throw new IllegalArgumentException(String.format("The numbers of y and x values must be equal (%d != %d)", y.length, x.length)); } double[][] xData = new double[x.length][]; for (int i = 0; i < x.length; i++) { // the implementation determines how to produce a vector of predictors from a single x xData[i] = xVector(x[i]); } if (logY()) { // in some models we are predicting ln y, so we replace each y with ln y y = Arrays.copyOf(y, y.length); // user might not be finished with the array we were given for (int i = 0; i < x.length; i++) { y[i] = Math.log(y[i]); } } final OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.setNoIntercept(true); // let the implementation include a constant in xVector if desired ols.newSampleData(y, xData); // provide the data to the model coef = MatrixUtils.createColumnRealMatrix(ols.estimateRegressionParameters()); // get our coefs last_error_rate = ols.estimateErrorVariance(); Log.d(TAG, getClass().getSimpleName() + " Forecast Error rate: errorvar:" + JoH.qs(last_error_rate, 4) + " regssionvar:" + JoH.qs(ols.estimateRegressandVariance(), 4) + " stderror:" + JoH.qs(ols.estimateRegressionStandardError(), 4)); }
@Override public void trainSimilarity(List<EnsembleSim> simList) { if (simList.isEmpty()) { throw new IllegalArgumentException("no examples to train on!"); } similarityInterpolator.trainSimilarity(simList); double[][] X = new double[simList.size()][numMetrics]; double[] Y = new double[simList.size()]; for (int i = 0; i<simList.size(); i++){ Y[i]=simList.get(i).knownSim.similarity; EnsembleSim es = similarityInterpolator.interpolate(simList.get(i)); for (int j=0; j<numMetrics; j++){ X[i][j]=es.getScores().get(j); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); simlarityCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are " + simlarityCoefficients.toString()); LOG.info("pearson for multiple regression is " + pearson); }
protected MultipleLinearRegression regress(double[][] observations, double[] outcomes) { OLSMultipleLinearRegression olsMultipleLinearRegression = new OLSMultipleLinearRegression(); olsMultipleLinearRegression.newSampleData(outcomes, observations); return olsMultipleLinearRegression; }