@Test public void testDeleteAndUpdate() { Random rand = RandomUtils.getRandom(); Multinomial<Integer> table = new Multinomial<>(); assertEquals(0, table.getWeight(), 1.0e-9); for (int i = 0; i < 10; i++) { p[i] = rand.nextDouble(); table.add(i, p[i]); total += p[i]; assertEquals(total, table.getWeight(), 1.0e-9); assertEquals(total, table.getWeight(), 1.0e-9); table.delete(7); p[7] = 0; table.set(8, 0); p[8] = 0; total -= delta; assertEquals(total, table.getWeight(), 1.0e-9); for (int i = 0; i < 10; i++) { assertEquals(p[i], table.getWeight(i), 0); assertEquals(p[i] / total, table.getProbability(i), 1.0e-10); table.set(9, 5.1); total -= p[9]; p[9] = 5.1; total += 5.1;
double sample(double u) { if (u < limit) { List<WeightedThing<Integer>> steps = Lists.newArrayList(); limit = 1; int i = 0; while (u / 20 < limit) { double pdf = pd.probability(i); limit -= pdf; steps.add(new WeightedThing<>(i, pdf)); i++; } steps.add(new WeightedThing<>(steps.size(), limit)); partial = new Multinomial<>(steps); } return partial.sample(u); } }
@Test public void testSetZeroWhileIterating() { Multinomial<Integer> table = new Multinomial<>(); for (int i = 0; i < 10000; ++i) { table.add(i, i); } // Setting a sample's weight to 0 removes from the items map. // If that map is used when iterating (it used to be), it will // trigger a ConcurrentModificationException. for (Integer sample : table) { table.set(sample, 0); } }
@Test(expected=NullPointerException.class) public void testNoNullValuesAllowed() { Multinomial<Integer> table = new Multinomial<>(); // No null values should be allowed. table.add(null, 1); }
Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < datapoints.size(); ++i) { double selectionProbability = deltaX + datapoints.size() * distanceMeasure.distance(datapoints.get(i), center); seedSelector.add(i, selectionProbability); WeightedVector row = datapoints.get(i); double w = distanceMeasure.distance(c_1, row) * 2 * Math.log(1 + row.getWeight()); seedSelector.set(i, w); while (centroids.size() < numClusters) { int seedIndex = seedSelector.sample(); Centroid nextSeed = new Centroid(datapoints.get(seedIndex)); nextSeed.setIndex(clusterIndex++); centroids.add(nextSeed); seedSelector.delete(seedIndex); if (newWeight < seedSelector.getWeight(currSeedIndex)) { seedSelector.set(currSeedIndex, newWeight);
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
Multinomial<Double> choice = new Multinomial<Double>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0); choice.add(-Math.sqrt(3.0), 1 / 6.0); for (int i = 0; i < projectedVectorSize; ++i) { for (int j = 0; j < vectorSize; ++j) { basisMatrix.set(i, j, choice.sample());
public NameSampler() { try { if (first.compareAndSet(null, new Multinomial<>())) { Preconditions.checkState(last.getAndSet(new Multinomial<>()) == null); String name = initialCap(parts.next()); double weight = Double.parseDouble(parts.next()); if (first.get().getWeight(name) == 0) { first.get().add(name, weight); } else { first.get().set(name, first.get().getWeight(name) + weight); String name = initialCap(parts.next()); double weight = Double.parseDouble(parts.next()); last.get().add(name, weight);
@Test public void testInsert() { Random rand = RandomUtils.getRandom(); Multinomial<Integer> table = new Multinomial<>(); double[] p = new double[10]; for (int i = 0; i < 10; i++) { p[i] = rand.nextDouble(); table.add(i, p[i]); } checkSelfConsistent(table); for (int i = 0; i < 10; i++) { assertEquals(p[i], table.getWeight(i), 0); } }
@Override public T sample() { Preconditions.checkArgument(!weight.isEmpty()); return sample(rand.nextDouble()); }
private static void checkSelfConsistent(Multinomial<Integer> table) { List<Double> weights = table.getWeights(); double totalWeight = table.getWeight(); double p = 0; int[] k = new int[weights.size()]; for (double weight : weights) { if (weight > 0) { if (p > 0) { k[table.sample(p - 1.0e-9)]++; } k[table.sample(p + 1.0e-9)]++; } p += weight / totalWeight; } k[table.sample(p - 1.0e-9)]++; assertEquals(1, p, 1.0e-9); for (int i = 0; i < weights.size(); i++) { if (table.getWeight(i) > 0) { assertEquals(2, k[i]); } else { assertEquals(0, k[i]); } } } }
public Multinomial(Iterable<WeightedThing<T>> things) { this(); for (WeightedThing<T> thing : things) { add(thing.getValue(), thing.getWeight()); } }
public void delete(T value) { set(value, 0); }
@Test(expected = IllegalArgumentException.class) public void testNoValues() { Multiset<String> emptySet = HashMultiset.create(); new Multinomial<>(emptySet); }
Multinomial<Integer> seedSelector = new Multinomial<>(); for (int i = 0; i < datapoints.size(); ++i) { double selectionProbability = deltaX + datapoints.size() * distanceMeasure.distance(datapoints.get(i), center); seedSelector.add(i, selectionProbability); WeightedVector row = datapoints.get(i); double w = distanceMeasure.distance(c_1, row) * 2 * Math.log(1 + row.getWeight()); seedSelector.set(i, w); while (centroids.size() < numClusters) { int seedIndex = seedSelector.sample(); Centroid nextSeed = new Centroid(datapoints.get(seedIndex)); nextSeed.setIndex(clusterIndex++); centroids.add(nextSeed); seedSelector.delete(seedIndex); if (newWeight < seedSelector.getWeight(currSeedIndex)) { seedSelector.set(currSeedIndex, newWeight);
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
Multinomial<Double> choice = new Multinomial<Double>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0); choice.add(-Math.sqrt(3.0), 1 / 6.0); for (int i = 0; i < projectedVectorSize; ++i) { for (int j = 0; j < vectorSize; ++j) { basisMatrix.set(i, j, choice.sample());
private void setup() { base = new Multinomial<>(); for (int i = 0; i < size; i++) { base.add(i, Math.pow(i + 1.0, -skew)); } }
public Multinomial(Multiset<T> counts) { this(); Preconditions.checkArgument(!counts.isEmpty(), "Need some data to build sampler"); rand = RandomUtils.getRandom(); for (T t : counts.elementSet()) { add(t, counts.count(t)); } }