@Test public void testDepth() { List<Integer> totals = Lists.newArrayList(); for (int i = 0; i < 1000; i++) { ChineseRestaurant x = new ChineseRestaurant(10); Multiset<Integer> counts = HashMultiset.create(); for (int j = 0; j < 100; j++) { counts.add(x.sample()); } List<Integer> tmp = Lists.newArrayList(); for (Integer k : counts.elementSet()) { tmp.add(counts.count(k)); } Collections.sort(tmp, Collections.reverseOrder()); while (totals.size() < tmp.size()) { totals.add(0); } int j = 0; for (Integer k : tmp) { totals.set(j, totals.get(j) + k); j++; } } // these are empirically derived values, not principled ones assertEquals(25000.0, (double) totals.get(0), 1000); assertEquals(24000.0, (double) totals.get(1), 1000); assertEquals(8000.0, (double) totals.get(2), 200); assertEquals(1000.0, (double) totals.get(15), 50); assertEquals(1000.0, (double) totals.get(20), 40); }
double predict5 = predictSize(m5.viewPart(0, k, 0, 3), i, 0.5); assertEquals(predict5, Math.log(s5.size()), 1); double predict9 = predictSize(m9.viewPart(0, k, 0, 3), i, 0.9); assertEquals(predict9, Math.log(s9.size()), 1); assertEquals(0.0, (double) hapaxCount(s0) / s0.size(), 0.25); assertEquals(0.5, (double) hapaxCount(s5) / s5.size(), 0.1); assertEquals(0.9, (double) hapaxCount(s9) / s9.size(), 0.05);
@Test public void testExtremeDiscount() { ChineseRestaurant x = new ChineseRestaurant(100, 1); Multiset<Integer> counts = HashMultiset.create(); for (int i = 0; i < 10000; i++) { counts.add(x.sample()); } assertEquals(10000, x.size()); for (int i = 0; i < 10000; i++) { assertEquals(1, x.count(i)); } }
/** * Predict the power law growth in number of unique samples from the first few data points. * Also check that the fitted growth coefficient is about right. * * @param m * @param currentIndex Total data points seen so far. Unique values should be log(currentIndex)*expectedCoefficient + offset. * @param expectedCoefficient What slope do we expect. * @return The predicted value for log(currentIndex) */ private static double predictSize(Matrix m, int currentIndex, double expectedCoefficient) { int rows = m.rowSize(); Matrix a = m.viewPart(0, rows, 1, 2); Matrix b = m.viewPart(0, rows, 0, 1); Matrix ata = a.transpose().times(a); Matrix atb = a.transpose().times(b); QRDecomposition s = new QRDecomposition(ata); Matrix r = s.solve(atb).transpose(); assertEquals(expectedCoefficient, r.get(0, 0), 0.2); return r.times(new DenseVector(new double[]{Math.log(currentIndex), 1})).get(0); }