public Multinomial(Iterable<WeightedThing<T>> things) { this(); for (WeightedThing<T> thing : things) { add(thing.getValue(), thing.getWeight()); } }
public Multinomial(Multiset<T> counts) { this(); Preconditions.checkArgument(!counts.isEmpty(), "Need some data to build sampler"); rand = RandomUtils.getRandom(); for (T t : counts.elementSet()) { add(t, counts.count(t)); } }
@Test(expected=NullPointerException.class) public void testNoNullValuesAllowed() { Multinomial<Integer> table = new Multinomial<>(); // No null values should be allowed. table.add(null, 1); }
@Test public void testSetZeroWhileIterating() { Multinomial<Integer> table = new Multinomial<>(); for (int i = 0; i < 10000; ++i) { table.add(i, i); } // Setting a sample's weight to 0 removes from the items map. // If that map is used when iterating (it used to be), it will // trigger a ConcurrentModificationException. for (Integer sample : table) { table.set(sample, 0); } }
@Test public void testInsert() { Random rand = RandomUtils.getRandom(); Multinomial<Integer> table = new Multinomial<>(); double[] p = new double[10]; for (int i = 0; i < 10; i++) { p[i] = rand.nextDouble(); table.add(i, p[i]); } checkSelfConsistent(table); for (int i = 0; i < 10; i++) { assertEquals(p[i], table.getWeight(i), 0); } }
for (int i = 0; i < 10; i++) { p[i] = rand.nextDouble(); table.add(i, p[i]); total += p[i]; assertEquals(total, table.getWeight(), 1.0e-9);
public void setDist(Map<String, ?> dist) { Preconditions.checkArgument(dist.size() > 0); distribution.compareAndSet(null, new Multinomial<>()); for (String key : dist.keySet()) { distribution.get().add(key, Double.parseDouble(dist.get(key).toString())); } }
private void setup() { base = new Multinomial<>(); for (int i = 0; i < size; i++) { base.add(i, Math.pow(i + 1.0, -skew)); } }
public StreetNameSampler() { Splitter onTabs = Splitter.on("\t"); try { for (String line : Resources.readLines(Resources.getResource("street-name-seeds"), Charsets.UTF_8)) { if (!line.startsWith("#")) { Iterator<Multinomial<String>> i = sampler.iterator(); for (String name : onTabs.split(line)) { i.next().add(name, 1); } } } } catch (IOException e) { throw new RuntimeException("Couldn't read built-in resource", e); } }
public Multinomial(Iterable<WeightedThing<T>> things) { this(); for (WeightedThing<T> thing : things) { add(thing.getValue(), thing.getWeight()); } }
protected void readDistribution(String resourceName) { try { if (distribution.compareAndSet(null, new Multinomial<>())) { Splitter onTab = Splitter.on("\t").trimResults(); double i = 20; for (String line : Resources.readLines(Resources.getResource(resourceName), Charsets.UTF_8)) { if (!line.startsWith("#")) { Iterator<String> parts = onTab.split(line).iterator(); String name = translate(parts.next()); double weight; if (parts.hasNext()) { weight = Double.parseDouble(parts.next()); } else { weight = 1.0 / i; } distribution.get().add(name, weight); } i++; } } } catch (IOException e) { throw new RuntimeException("Couldn't read built-in resource file", e); } }
/** * Sets the distribution to be used. The format is a list of number pairs. * The first value in each pair is the value to return, the second is the * (unnormalized) probability for that number. For instance [1, 50, 2, 30, 3, 1] * will cause the sampler to return 1 a bit less than 60% of the time, 2 a bit * less than 40% of the time and 3 just a bit over 1% of the time. * * @param dist A JSON list describing the distribution of numbers. */ public void setDist(JsonNode dist) { if (dist.isArray()) { if (dist.size() % 2 != 0) { throw new IllegalArgumentException("Need distribution to be an even sized list of numbers"); } this.dist = new Multinomial<>(); Iterator<JsonNode> i = dist.iterator(); while (i.hasNext()) { JsonNode v = i.next(); JsonNode p = i.next(); if (!v.canConvertToLong() || !p.isNumber()) { throw new IllegalArgumentException(String.format("Need distribution to be a list of value, probability pairs, got %s (%s,%s)", dist, v.getClass(), p.getClass())); } this.dist.add(v.asLong(), p.asDouble()); } } }
public Multinomial(Multiset<T> counts) { this(); Preconditions.checkArgument(!counts.isEmpty(), "Need some data to build sampler"); rand = RandomUtils.getRandom(); for (T t : counts.elementSet()) { add(t, counts.count(t)); } }
Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize); Multinomial<Double> choice = new Multinomial<Double>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0); choice.add(-Math.sqrt(3.0), 1 / 6.0); for (int i = 0; i < projectedVectorSize; ++i) { for (int j = 0; j < vectorSize; ++j) {
Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize); Multinomial<Double> choice = new Multinomial<Double>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0); choice.add(-Math.sqrt(3.0), 1 / 6.0); for (int i = 0; i < projectedVectorSize; ++i) { for (int j = 0; j < vectorSize; ++j) {
Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize); Multinomial<Double> choice = new Multinomial<>(); choice.add(0.0, 2 / 3.0); choice.add(Math.sqrt(3.0), 1 / 6.0); choice.add(-Math.sqrt(3.0), 1 / 6.0); for (int i = 0; i < projectedVectorSize; ++i) { for (int j = 0; j < vectorSize; ++j) {
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }
/** * Selects some of the original points randomly with probability proportional to their weights. This is much * less sophisticated than the kmeans++ approach, however it is faster and coupled with * * The side effect of this method is to fill the centroids structure itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) { int numDatapoints = datapoints.size(); double totalWeight = 0; for (WeightedVector datapoint : datapoints) { totalWeight += datapoint.getWeight(); } Multinomial<Integer> seedSelector = new Multinomial<>(); for (int i = 0; i < numDatapoints; ++i) { seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight); } for (int i = 0; i < numClusters; ++i) { int sample = seedSelector.sample(); seedSelector.delete(sample); Centroid centroid = new Centroid(datapoints.get(sample)); centroid.setIndex(i); centroids.add(centroid); } }