@Override public double test(FeatureValues<String> values) { Set<String> nominalValuesSet = new HashSet<>(); Map<String, Map<String, Integer>> conditionsCounts = new HashMap<>(); for(String condition : values.getConditionNames()) { for(String value : values.getConditionValues(condition)) { nominalValuesSet.add(value); conditionsCounts.putIfAbsent(condition, new HashMap<>()); conditionsCounts.get(condition).put( value, conditionsCounts.get(condition).getOrDefault(value, 0) + 1 ); } } String[] nominalValues = nominalValuesSet .toArray(new String[nominalValuesSet.size()]); int conditionNamesLength = values.getConditionNames().length; long[][] counts = new long[nominalValues.length][conditionNamesLength]; for (int column = 0; column < conditionNamesLength; column++) { for (int row = 0; row < nominalValues.length; row++) { counts[row][column] = conditionsCounts .get(values.getConditionNames()[column]) .getOrDefault(nominalValues[row], 0); } } return chiSquareTest(counts); }
boolean chiSquare(List<? extends Number> expectations, List<? extends Number> observations, double confidence) { final double chi = TestUtils.chiSquareTest(Doubles.toArray(expectations), Longs.toArray(observations)); return !(chi < confidence); }
/** * Checks whether the observations conform to a Poisson process with the * specified intensity. Uses a chi square test with the specified confidence. * The null hypothesis is that the observations are the result of a poisson * process. * @param observations * @param intensity * @param confidence * @return <code>true</code> if the observations */ static boolean isPoissonProcess(Frequency observations, double intensity, double length, double confidence) { final PoissonDistribution pd = new PoissonDistribution(length * intensity); final Iterator<?> it = observations.valuesIterator(); final long[] observed = new long[observations.getUniqueCount()]; final double[] expected = new double[observations.getUniqueCount()]; int index = 0; while (it.hasNext()) { final Long l = (Long) it.next(); observed[index] = observations.getCount(l); expected[index] = pd.probability(l.intValue()) * observations.getSumFreq(); if (expected[index] == 0) { return false; } index++; } final double chi = TestUtils.chiSquareTest(expected, observed); return !(chi < confidence); }