/** * Create and initialize a new array of MeanVariance * * @param dimensionality Dimensionality * @return New and initialized Array */ public static Mean[] newArray(int dimensionality) { Mean[] arr = new Mean[dimensionality]; for(int i = 0; i < dimensionality; i++) { arr[i] = new Mean(); } return arr; }
/** * Create and initialize a new array of MeanVariance * * @param dimensionality Dimensionality * @return New and initialized Array */ public static Mean[] newArray(int dimensionality) { Mean[] arr = new Mean[dimensionality]; for(int i = 0; i < dimensionality; i++) { arr[i] = new Mean(); } return arr; }
/** * Create and initialize a new array of MeanVariance * * @param dimensionality Dimensionality * @return New and initialized Array */ public static Mean[] newArray(int dimensionality) { Mean[] arr = new Mean[dimensionality]; for(int i = 0; i < dimensionality; i++) { arr[i] = new Mean(); } return arr; }
/** * Compute the mean square residue. * * @param mat Data matrix * @return mean squared residue */ protected double computeMeanSquaredDeviation(final double[][] mat) { final Mean msr = new Mean(); visitAll(mat, CellVisitor.SELECTED, new CellVisitor() { @Override public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) { assert (selrow && selcol); double v = val - rowM[row] - colM[col] + allM; msr.put(v * v); return false; } }); residue = msr.getMean(); return residue; }
/** * Compute the mean square residue. * * @param mat Data matrix * @return mean squared residue */ protected double computeMeanSquaredDeviation(final double[][] mat) { final Mean msr = new Mean(); visitAll(mat, CellVisitor.SELECTED, new CellVisitor() { @Override public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) { assert (selrow && selcol); double v = val - rowM[row] - colM[col] + allM; msr.put(v * v); return false; } }); residue = msr.getMean(); return residue; }
/** * Computes the <b>mean row residue</b> of the given <code>row</code>. * * @param mat Data matrix * @param row The row who's residue should be computed. * @param rowinverted Indicates if the row should be considered inverted. * @return The row residue of the given <code>row</code>. */ protected double computeRowResidue(final double[][] mat, int row, final boolean rowinverted) { final Mean rowResidue = new Mean(); visitRow(mat, row, CellVisitor.SELECTED, new CellVisitor() { @Override public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) { assert (selcol); final double rowMean = rowM[row]; final double colMean = colM[col]; double v = ((!rowinverted) ? (val - rowMean) : (rowMean - val)) - colMean + allM; rowResidue.put(v * v); return false; } }); return rowResidue.getMean(); }
/** * Compute the mean square residue. * * @param mat Data matrix * @return mean squared residue */ protected double computeMeanSquaredDeviation(final double[][] mat) { final Mean msr = new Mean(); visitAll(mat, CellVisitor.SELECTED, new CellVisitor() { @Override public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) { assert (selrow && selcol); double v = val - rowM[row] - colM[col] + allM; msr.put(v * v); return false; } }); residue = msr.getMean(); return residue; }
/** * * Computes the <b>mean column residue</b> of the given <code>col</code>. * * @param col The column who's residue should be computed. * @return The row residue of the given <code>col</code>um. */ protected double computeColResidue(final double[][] mat, final int col) { final double bias = colM[col] - allM; final Mean colResidue = new Mean(); visitColumn(mat, col, CellVisitor.SELECTED, new CellVisitor() { @Override public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) { assert (selrow); final double rowMean = rowM[row]; double v = val - rowMean - bias; colResidue.put(v * v); return false; } }); return colResidue.getMean(); }
/** * * Computes the <b>mean column residue</b> of the given <code>col</code>. * * @param col The column who's residue should be computed. * @return The row residue of the given <code>col</code>um. */ protected double computeColResidue(final double[][] mat, final int col) { final double bias = colM[col] - allM; final Mean colResidue = new Mean(); visitColumn(mat, col, CellVisitor.SELECTED, new CellVisitor() { @Override public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) { assert (selrow); final double rowMean = rowM[row]; double v = val - rowMean - bias; colResidue.put(v * v); return false; } }); return colResidue.getMean(); }
/** * Computes the <b>mean row residue</b> of the given <code>row</code>. * * @param mat Data matrix * @param row The row who's residue should be computed. * @param rowinverted Indicates if the row should be considered inverted. * @return The row residue of the given <code>row</code>. */ protected double computeRowResidue(final double[][] mat, int row, final boolean rowinverted) { final Mean rowResidue = new Mean(); visitRow(mat, row, CellVisitor.SELECTED, new CellVisitor() { @Override public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) { assert (selcol); final double rowMean = rowM[row]; final double colMean = colM[col]; double v = ((!rowinverted) ? (val - rowMean) : (rowMean - val)) - colMean + allM; rowResidue.put(v * v); return false; } }); return rowResidue.getMean(); }
/** * * Computes the <b>mean column residue</b> of the given <code>col</code>. * * @param col The column who's residue should be computed. * @return The row residue of the given <code>col</code>um. */ protected double computeColResidue(final double[][] mat, final int col) { final double bias = colM[col] - allM; final Mean colResidue = new Mean(); visitColumn(mat, col, CellVisitor.SELECTED, new CellVisitor() { @Override public boolean visit(double val, int row, int col, boolean selrow, boolean selcol) { assert (selrow); final double rowMean = rowM[row]; double v = val - rowMean - bias; colResidue.put(v * v); return false; } }); return colResidue.getMean(); }
/** * Computes the average distance of the objects to the centroid along the * specified dimension. * * @param centroid the centroid * @param objectIDs the set of objects ids * @param database the database holding the objects * @param dimension the dimension for which the average distance is computed * @return the average distance of the objects to the centroid along the * specified dimension */ private double avgDistance(double[] centroid, DBIDs objectIDs, Relation<V> database, int dimension) { Mean avg = new Mean(); for(DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) { V o = database.get(iter); avg.put(Math.abs(centroid[dimension] - o.doubleValue(dimension))); } return avg.getMean(); }
/** * Computes the average distance of the objects to the centroid along the * specified dimension. * * @param centroid the centroid * @param objectIDs the set of objects ids * @param database the database holding the objects * @param dimension the dimension for which the average distance is computed * @return the average distance of the objects to the centroid along the * specified dimension */ private double avgDistance(double[] centroid, DBIDs objectIDs, Relation<V> database, int dimension) { Mean avg = new Mean(); for(DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) { V o = database.get(iter); avg.put(Math.abs(centroid[dimension] - o.doubleValue(dimension))); } return avg.getMean(); }
@Override public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) { if(Double.isNaN(fixedmean)) { MeanVariance mv = new MeanVariance(); final int size = adapter.size(array); for(int i = 0; i < size; i++) { double val = adapter.getDouble(array, i); if(!Double.isInfinite(val)) { mv.put(val); } } mean = mv.getMean(); factor = lambda * mv.getSampleStddev() * MathUtil.SQRT2; } else { mean = fixedmean; Mean sqsum = new Mean(); final int size = adapter.size(array); for(int i = 0; i < size; i++) { double val = adapter.getDouble(array, i); if(!Double.isInfinite(val)) { sqsum.put((val - mean) * (val - mean)); } } factor = lambda * FastMath.sqrt(sqsum.getMean()) * MathUtil.SQRT2; } factor = factor > 0 ? factor : Double.MIN_NORMAL; }
/** * Computes the average distance of the objects to the centroid along the * specified dimension. * * @param centroid the centroid * @param objectIDs the set of objects ids * @param database the database holding the objects * @param dimension the dimension for which the average distance is computed * @return the average distance of the objects to the centroid along the * specified dimension */ private double avgDistance(Vector centroid, DBIDs objectIDs, Relation<V> database, int dimension) { Mean avg = new Mean(); for(DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) { V o = database.get(iter); avg.put(Math.abs(centroid.doubleValue(dimension) - o.doubleValue(dimension))); } return avg.getMean(); }
@Override public void prepare(OutlierResult or) { if(Double.isNaN(fixedmean)) { MeanVariance mv = new MeanVariance(); DoubleRelation scores = or.getScores(); for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = -FastMath.log(scores.doubleValue(id)); if(!Double.isNaN(val) && !Double.isInfinite(val)) { mv.put(val); } } mean = mv.getMean(); factor = lambda * mv.getSampleStddev() * MathUtil.SQRT2; } else { mean = fixedmean; Mean sqsum = new Mean(); DoubleRelation scores = or.getScores(); for(DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) { double val = -FastMath.log(scores.doubleValue(id)); if(!Double.isNaN(val) && !Double.isInfinite(val)) { sqsum.put((val - mean) * (val - mean)); } } factor = lambda * FastMath.sqrt(sqsum.getMean()) * MathUtil.SQRT2; } }
@Test public void combine() { Mean m1 = new Mean(), m2 = new Mean(); m1.put(new double[] { 1, 2, 3 }); m2.put(new double[] { 4, 5, 6, 7 }); Mean m3 = new Mean(m1); m3.put(m2); assertEquals("First mean", 2, m1.getMean(), 0.); assertEquals("Second mean", 5.5, m2.getMean(), 0.); assertEquals("Third mean", 4, m3.getMean(), 0.); m2.put(new double[] { 1, 2, 3 }, new double[] { 3, 2, 1 }); assertEquals("Fourth mean", 3.2, m2.getMean(), 1e-15); } }
@Test public void testInfinity() { Mean m = new Mean(); m.put(Double.POSITIVE_INFINITY); m.put(0.); assertEquals(2, m.getCount(), 0); assertEquals("Sensitive to infinity", Double.POSITIVE_INFINITY, m.getMean(), 0); m = new Mean(); m.put(Double.NEGATIVE_INFINITY); m.put(0.); assertEquals(2, m.getCount(), 0); assertEquals("Sensitive to infinity", Double.NEGATIVE_INFINITY, m.getMean(), 0); }
/** * Note: this test tests an earlier bug with tiny arrays. Keep. */ @Test public void basic() { Mean m = new Mean(); m.put(0); m.put(new double[] {}); m.put(new double[] { 0 }); m.put(new double[] { 0, 0 }); m.put(new double[] { 0, 0, 0 }); assertEquals("Count wrong.", 7, m.getCount(), 0.); assertEquals("Mean wrong.", 0, m.getMean(), 0.); assertEquals("No toString", -1, m.toString().indexOf('@')); assertEquals("Static helper", 2, Mean.of(1, 2, 3), 0.); assertEquals("Static helper", 2, Mean.highPrecision(1, 2, 3), 0.); }
/** * Run the actual algorithm. * * @param relation Relation * @return Result */ public OutlierResult run(Relation<? extends NumberVector> relation) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT); DoubleMinMax minmax = new DoubleMinMax(); Mean m = new Mean(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { m.reset(); NumberVector nv = relation.get(iditer); for (int i = 0; i < nv.getDimensionality(); i++) { m.put(nv.doubleValue(i)); } final double score = m.getMean(); scores.putDouble(iditer, score); minmax.put(score); } DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial mean score", "mean-outlier", scores, relation.getDBIDs()); OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); return new OutlierResult(meta, scoreres); }