@Override public T sample() { Preconditions.checkArgument(!weight.isEmpty()); return sample(rand.nextDouble()); }
double sample(double u) { if (u < limit) { List<WeightedThing<Integer>> steps = Lists.newArrayList(); limit = 1; int i = 0; while (u / 20 < limit) { double pdf = pd.probability(i); limit -= pdf; steps.add(new WeightedThing<>(i, pdf)); i++; } steps.add(new WeightedThing<>(steps.size(), limit)); partial = new Multinomial<>(steps); } return partial.sample(u); } }
@Test public void testEvenSplit() { Multiset<String> stuff = HashMultiset.create(); for (int i = 0; i < 5; i++) { stuff.add(String.valueOf(i)); } Multinomial<String> s = new Multinomial<>(stuff); double EPSILON = 1.0e-15; Multiset<String> cnt = HashMultiset.create(); for (int i = 0; i < 5; i++) { cnt.add(s.sample(i * 0.2)); cnt.add(s.sample(i * 0.2 + EPSILON)); cnt.add(s.sample((i + 1) * 0.2 - EPSILON)); } assertEquals(5, cnt.elementSet().size()); for (String v : cnt.elementSet()) { assertEquals(3, cnt.count(v), 1.01); } assertTrue(cnt.contains(s.sample(1))); assertEquals(s.sample(1 - EPSILON), s.sample(1)); }
double p0 = i * 0.02; double p1 = (i + 1) * 0.02; cnt.add(s0.sample(p0)); cnt.add(s0.sample(p0 + EPSILON)); cnt.add(s0.sample(p1 - EPSILON)); assertEquals(s0.sample(p0), s1.sample(p0)); assertEquals(s0.sample(p0 + EPSILON), s1.sample(p0 + EPSILON)); assertEquals(s0.sample(p1 - EPSILON), s1.sample(p1 - EPSILON)); assertEquals(s0.sample(p0), s2.sample(p0)); assertEquals(s0.sample(p0 + EPSILON), s2.sample(p0 + EPSILON)); assertEquals(s0.sample(p1 - EPSILON), s2.sample(p1 - EPSILON)); assertEquals(s0.sample(0), s1.sample(0)); assertEquals(s0.sample(0 + EPSILON), s1.sample(0 + EPSILON)); assertEquals(s0.sample(1 - EPSILON), s1.sample(1 - EPSILON)); assertEquals(s0.sample(1), s1.sample(1)); assertEquals(s0.sample(0), s2.sample(0)); assertEquals(s0.sample(0 + EPSILON), s2.sample(0 + EPSILON)); assertEquals(s0.sample(1 - EPSILON), s2.sample(1 - EPSILON)); assertEquals(s0.sample(1), s2.sample(1)); assertTrue(cnt.contains(s0.sample(1))); assertEquals(s0.sample(1 - EPSILON), s0.sample(1));
private static void checkSelfConsistent(Multinomial<Integer> table) { List<Double> weights = table.getWeights(); double totalWeight = table.getWeight(); double p = 0; int[] k = new int[weights.size()]; for (double weight : weights) { if (weight > 0) { if (p > 0) { k[table.sample(p - 1.0e-9)]++; } k[table.sample(p + 1.0e-9)]++; } p += weight / totalWeight; } k[table.sample(p - 1.0e-9)]++; assertEquals(1, p, 1.0e-9); for (int i = 0; i < weights.size(); i++) { if (table.getWeight(i) > 0) { assertEquals(2, k[i]); } else { assertEquals(0, k[i]); } } } }
@Override public JsonNode sample() { synchronized (this) { return new TextNode(distribution.get().sample()); } } }
@Override public JsonNode sample() { synchronized (this) { return new IntNode(base.sample()); } } }
@Override public T sample() { Preconditions.checkArgument(!weight.isEmpty()); return sample(rand.nextDouble()); }
@Override public JsonNode sample() { synchronized (this) { switch (type) { case FIRST: return new TextNode(first.get().sample()); case LAST: return new TextNode(last.get().sample()); case FIRST_LAST: return new TextNode(first.get().sample() + " " + last.get().sample()); case LAST_FIRST: return new TextNode(last.get().sample() + ", " + first.get().sample()); } } // can't happen return null; }
@Override public JsonNode sample() { synchronized (this) { if (dist == null) { int r = power >= 0 ? Integer.MAX_VALUE : Integer.MIN_VALUE; if (power >= 0) { for (int i = 0; i <= power; i++) { r = Math.min(r, min + base.nextInt(max - min)); } } else { int n = -power; for (int i = 0; i <= n; i++) { r = Math.max(r, min + base.nextInt(max - min)); } } if (format == null) { return new IntNode(r); } else { return new TextNode(String.format(format, r)); } } else { return new LongNode(dist.sample()); } } }
double sample(double u) { if (u < limit) { List<WeightedThing<Integer>> steps = Lists.newArrayList(); limit = 1; int i = 0; while (u / 20 < limit) { double pdf = pd.probability(i); limit -= pdf; steps.add(new WeightedThing<>(i, pdf)); i++; } steps.add(new WeightedThing<>(steps.size(), limit)); partial = new Multinomial<>(steps); } return partial.sample(u); } }
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
while (centroids.size() < numClusters) { int seedIndex = seedSelector.sample(); Centroid nextSeed = new Centroid(datapoints.get(seedIndex)); nextSeed.setIndex(clusterIndex++);
for (int i = 0; i < projectedVectorSize; ++i) { for (int j = 0; j < vectorSize; ++j) { basisMatrix.set(i, j, choice.sample());
for (int i = 0; i < projectedVectorSize; ++i) { for (int j = 0; j < vectorSize; ++j) { basisMatrix.set(i, j, choice.sample());
for (int i = 0; i < projectedVectorSize; ++i) { for (int j = 0; j < vectorSize; ++j) { basisMatrix.set(i, j, choice.sample());