double sample(double u) { if (u < limit) { List<WeightedThing<Integer>> steps = Lists.newArrayList(); limit = 1; int i = 0; while (u / 20 < limit) { double pdf = pd.probability(i); limit -= pdf; steps.add(new WeightedThing<>(i, pdf)); i++; } steps.add(new WeightedThing<>(steps.size(), limit)); partial = new Multinomial<>(steps); } return partial.sample(u); } }
@Test(expected = IllegalArgumentException.class) public void testNoValues() { Multiset<String> emptySet = HashMultiset.create(); new Multinomial<>(emptySet); }
@Test(expected=NullPointerException.class) public void testNoNullValuesAllowed() { Multinomial<Integer> table = new Multinomial<>(); // No null values should be allowed. table.add(null, 1); }
@Test public void testSetZeroWhileIterating() { Multinomial<Integer> table = new Multinomial<>(); for (int i = 0; i < 10000; ++i) { table.add(i, i); } // Setting a sample's weight to 0 removes from the items map. // If that map is used when iterating (it used to be), it will // trigger a ConcurrentModificationException. for (Integer sample : table) { table.set(sample, 0); } }
@Test public void testSingleton() { Multiset<String> oneThing = HashMultiset.create(); oneThing.add("one"); Multinomial<String> s = new Multinomial<>(oneThing); assertEquals("one", s.sample(0)); assertEquals("one", s.sample(0.1)); assertEquals("one", s.sample(1)); }
@Test public void testInsert() { Random rand = RandomUtils.getRandom(); Multinomial<Integer> table = new Multinomial<>(); double[] p = new double[10]; for (int i = 0; i < 10; i++) { p[i] = rand.nextDouble(); table.add(i, p[i]); } checkSelfConsistent(table); for (int i = 0; i < 10; i++) { assertEquals(p[i], table.getWeight(i), 0); } }
@Test public void testEvenSplit() { Multiset<String> stuff = HashMultiset.create(); for (int i = 0; i < 5; i++) { stuff.add(String.valueOf(i)); } Multinomial<String> s = new Multinomial<>(stuff); double EPSILON = 1.0e-15; Multiset<String> cnt = HashMultiset.create(); for (int i = 0; i < 5; i++) { cnt.add(s.sample(i * 0.2)); cnt.add(s.sample(i * 0.2 + EPSILON)); cnt.add(s.sample((i + 1) * 0.2 - EPSILON)); } assertEquals(5, cnt.elementSet().size()); for (String v : cnt.elementSet()) { assertEquals(3, cnt.count(v), 1.01); } assertTrue(cnt.contains(s.sample(1))); assertEquals(s.sample(1 - EPSILON), s.sample(1)); }
Multinomial<String> s0 = new Multinomial<>(stuff); Multinomial<String> s1 = new Multinomial<>(stuff); Multinomial<String> s2 = new Multinomial<>(stuff); double EPSILON = 1.0e-15;
@Test public void testDeleteAndUpdate() { Random rand = RandomUtils.getRandom(); Multinomial<Integer> table = new Multinomial<>(); assertEquals(0, table.getWeight(), 1.0e-9);
public void setDist(Map<String, ?> dist) { Preconditions.checkArgument(dist.size() > 0); distribution.compareAndSet(null, new Multinomial<>()); for (String key : dist.keySet()) { distribution.get().add(key, Double.parseDouble(dist.get(key).toString())); } }
private void setup() { base = new Multinomial<>(); for (int i = 0; i < size; i++) { base.add(i, Math.pow(i + 1.0, -skew)); } }
protected void readDistribution(String resourceName) { try { if (distribution.compareAndSet(null, new Multinomial<>())) { Splitter onTab = Splitter.on("\t").trimResults(); double i = 20; for (String line : Resources.readLines(Resources.getResource(resourceName), Charsets.UTF_8)) { if (!line.startsWith("#")) { Iterator<String> parts = onTab.split(line).iterator(); String name = translate(parts.next()); double weight; if (parts.hasNext()) { weight = Double.parseDouble(parts.next()); } else { weight = 1.0 / i; } distribution.get().add(name, weight); } i++; } } } catch (IOException e) { throw new RuntimeException("Couldn't read built-in resource file", e); } }
/** * Sets the distribution to be used. The format is a list of number pairs. * The first value in each pair is the value to return, the second is the * (unnormalized) probability for that number. For instance [1, 50, 2, 30, 3, 1] * will cause the sampler to return 1 a bit less than 60% of the time, 2 a bit * less than 40% of the time and 3 just a bit over 1% of the time. * * @param dist A JSON list describing the distribution of numbers. */ public void setDist(JsonNode dist) { if (dist.isArray()) { if (dist.size() % 2 != 0) { throw new IllegalArgumentException("Need distribution to be an even sized list of numbers"); } this.dist = new Multinomial<>(); Iterator<JsonNode> i = dist.iterator(); while (i.hasNext()) { JsonNode v = i.next(); JsonNode p = i.next(); if (!v.canConvertToLong() || !p.isNumber()) { throw new IllegalArgumentException(String.format("Need distribution to be a list of value, probability pairs, got %s (%s,%s)", dist, v.getClass(), p.getClass())); } this.dist.add(v.asLong(), p.asDouble()); } } }
double sample(double u) { if (u < limit) { List<WeightedThing<Integer>> steps = Lists.newArrayList(); limit = 1; int i = 0; while (u / 20 < limit) { double pdf = pd.probability(i); limit -= pdf; steps.add(new WeightedThing<>(i, pdf)); i++; } steps.add(new WeightedThing<>(steps.size(), limit)); partial = new Multinomial<>(steps); } return partial.sample(u); } }
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
Multinomial<Double> choice = new Multinomial<Double>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0);
Multinomial<Double> choice = new Multinomial<Double>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0);
Multinomial<Double> choice = new Multinomial<>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0);