org.apache.mahout.math.random.Multinomial.add java code examples

public Multinomial(Iterable<WeightedThing<T>> things) {
 this();
 for (WeightedThing<T> thing : things) {
  add(thing.getValue(), thing.getWeight());
 }
}

public Multinomial(Multiset<T> counts) {
 this();
 Preconditions.checkArgument(!counts.isEmpty(), "Need some data to build sampler");
 rand = RandomUtils.getRandom();
 for (T t : counts.elementSet()) {
  add(t, counts.count(t));
 }
}

@Test(expected=NullPointerException.class)
public void testNoNullValuesAllowed() {
 Multinomial<Integer> table = new Multinomial<>();
 // No null values should be allowed.
 table.add(null, 1);
}

 @Test
public void testSetZeroWhileIterating() {
 Multinomial<Integer> table = new Multinomial<>();
 for (int i = 0; i < 10000; ++i) {
  table.add(i, i);
 }
 // Setting a sample's weight to 0 removes from the items map.
 // If that map is used when iterating (it used to be), it will
 // trigger a ConcurrentModificationException.
 for (Integer sample : table) {
  table.set(sample, 0);
 }
}

@Test
public void testInsert() {
  Random rand = RandomUtils.getRandom();
  Multinomial<Integer> table = new Multinomial<>();
  double[] p = new double[10];
  for (int i = 0; i < 10; i++) {
    p[i] = rand.nextDouble();
    table.add(i, p[i]);
  }
  checkSelfConsistent(table);
  for (int i = 0; i < 10; i++) {
    assertEquals(p[i], table.getWeight(i), 0);
  }
}

for (int i = 0; i < 10; i++) {
  p[i] = rand.nextDouble();
  table.add(i, p[i]);
  total += p[i];
  assertEquals(total, table.getWeight(), 1.0e-9);

public void setDist(Map<String, ?> dist) {
  Preconditions.checkArgument(dist.size() > 0);
  distribution.compareAndSet(null, new Multinomial<>());
  for (String key : dist.keySet()) {
    distribution.get().add(key, Double.parseDouble(dist.get(key).toString()));
  }
}

private void setup() {
  base = new Multinomial<>();
  for (int i = 0; i < size; i++) {
    base.add(i, Math.pow(i + 1.0, -skew));
  }
}

public StreetNameSampler() {
  Splitter onTabs = Splitter.on("\t");
  try {
    for (String line : Resources.readLines(Resources.getResource("street-name-seeds"), Charsets.UTF_8)) {
      if (!line.startsWith("#")) {
        Iterator<Multinomial<String>> i = sampler.iterator();
        for (String name : onTabs.split(line)) {
          i.next().add(name, 1);
        }
      }
    }
  } catch (IOException e) {
    throw new RuntimeException("Couldn't read built-in resource", e);
  }
}

public Multinomial(Iterable<WeightedThing<T>> things) {
 this();
 for (WeightedThing<T> thing : things) {
  add(thing.getValue(), thing.getWeight());
 }
}

protected void readDistribution(String resourceName) {
  try {
    if (distribution.compareAndSet(null, new Multinomial<>())) {
      Splitter onTab = Splitter.on("\t").trimResults();
      double i = 20;
      for (String line : Resources.readLines(Resources.getResource(resourceName), Charsets.UTF_8)) {
        if (!line.startsWith("#")) {
          Iterator<String> parts = onTab.split(line).iterator();
          String name = translate(parts.next());
          double weight;
          if (parts.hasNext()) {
            weight = Double.parseDouble(parts.next());
          } else {
            weight = 1.0 / i;
          }
          distribution.get().add(name, weight);
        }
        i++;
      }
    }
  } catch (IOException e) {
    throw new RuntimeException("Couldn't read built-in resource file", e);
  }
}

/**
 * Sets the distribution to be used. The format is a list of number pairs.
 * The first value in each pair is the value to return, the second is the
 * (unnormalized) probability for that number. For instance [1, 50, 2, 30, 3, 1]
 * will cause the sampler to return 1 a bit less than 60% of the time, 2 a bit
 * less than 40% of the time and 3 just a bit over 1% of the time.
 *
 * @param dist A JSON list describing the distribution of numbers.
 */
public void setDist(JsonNode dist) {
  if (dist.isArray()) {
    if (dist.size() % 2 != 0) {
      throw new IllegalArgumentException("Need distribution to be an even sized list of numbers");
    }
    this.dist = new Multinomial<>();
    Iterator<JsonNode> i = dist.iterator();
    while (i.hasNext()) {
      JsonNode v = i.next();
      JsonNode p = i.next();
      if (!v.canConvertToLong() || !p.isNumber()) {
        throw new IllegalArgumentException(String.format("Need distribution to be a list of value, probability pairs, got %s (%s,%s)", dist, v.getClass(), p.getClass()));
      }
      this.dist.add(v.asLong(), p.asDouble());
    }
  }
}

  double weight = Double.parseDouble(parts.next());
  if (first.get().getWeight(name) == 0) {
    first.get().add(name, weight);
  } else {
String name = initialCap(parts.next());
double weight = Double.parseDouble(parts.next());
last.get().add(name, weight);

public Multinomial(Multiset<T> counts) {
 this();
 Preconditions.checkArgument(!counts.isEmpty(), "Need some data to build sampler");
 rand = RandomUtils.getRandom();
 for (T t : counts.elementSet()) {
  add(t, counts.count(t));
 }
}

Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize);
Multinomial<Double> choice = new Multinomial<Double>();
choice.add(0.0, 2 / 3.0);
choice.add(Math.sqrt(3.0), 1 / 6.0);
choice.add(-Math.sqrt(3.0), 1 / 6.0);
for (int i = 0; i < projectedVectorSize; ++i) {
 for (int j = 0; j < vectorSize; ++j) {

Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize);
Multinomial<Double> choice = new Multinomial<Double>();
choice.add(0.0, 2 / 3.0);
choice.add(Math.sqrt(3.0), 1 / 6.0);
choice.add(-Math.sqrt(3.0), 1 / 6.0);
for (int i = 0; i < projectedVectorSize; ++i) {
 for (int j = 0; j < vectorSize; ++j) {

Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize);
Multinomial<Double> choice = new Multinomial<>();
choice.add(0.0, 2 / 3.0);
choice.add(Math.sqrt(3.0), 1 / 6.0);
choice.add(-Math.sqrt(3.0), 1 / 6.0);
for (int i = 0; i < projectedVectorSize; ++i) {
 for (int j = 0; j < vectorSize; ++j) {

/**
 * Selects some of the original points randomly with probability proportional to their weights. This is much
 * less sophisticated than the kmeans++ approach, however it is faster and coupled with
 *
 * The side effect of this method is to fill the centroids structure itself.
 *
 * @param datapoints The datapoints to select from.  These datapoints should be WeightedVectors of some kind.
 */
private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) {
 int numDatapoints = datapoints.size();
 double totalWeight = 0;
 for (WeightedVector datapoint : datapoints) {
  totalWeight += datapoint.getWeight();
 }
 Multinomial<Integer> seedSelector = new Multinomial<Integer>();
 for (int i = 0; i < numDatapoints; ++i) {
  seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight);
 }
 for (int i = 0; i < numClusters; ++i) {
  int sample = seedSelector.sample();
  seedSelector.delete(sample);
  Centroid centroid = new Centroid(datapoints.get(sample));
  centroid.setIndex(i);
  centroids.add(centroid);
 }
}

/**
 * Selects some of the original points randomly with probability proportional to their weights. This is much
 * less sophisticated than the kmeans++ approach, however it is faster and coupled with
 *
 * The side effect of this method is to fill the centroids structure itself.
 *
 * @param datapoints The datapoints to select from.  These datapoints should be WeightedVectors of some kind.
 */
private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) {
 int numDatapoints = datapoints.size();
 double totalWeight = 0;
 for (WeightedVector datapoint : datapoints) {
  totalWeight += datapoint.getWeight();
 }
 Multinomial<Integer> seedSelector = new Multinomial<Integer>();
 for (int i = 0; i < numDatapoints; ++i) {
  seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight);
 }
 for (int i = 0; i < numClusters; ++i) {
  int sample = seedSelector.sample();
  seedSelector.delete(sample);
  Centroid centroid = new Centroid(datapoints.get(sample));
  centroid.setIndex(i);
  centroids.add(centroid);
 }
}

/**
 * Selects some of the original points randomly with probability proportional to their weights. This is much
 * less sophisticated than the kmeans++ approach, however it is faster and coupled with
 *
 * The side effect of this method is to fill the centroids structure itself.
 *
 * @param datapoints The datapoints to select from.  These datapoints should be WeightedVectors of some kind.
 */
private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) {
 int numDatapoints = datapoints.size();
 double totalWeight = 0;
 for (WeightedVector datapoint : datapoints) {
  totalWeight += datapoint.getWeight();
 }
 Multinomial<Integer> seedSelector = new Multinomial<>();
 for (int i = 0; i < numDatapoints; ++i) {
  seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight);
 }
 for (int i = 0; i < numClusters; ++i) {
  int sample = seedSelector.sample();
  seedSelector.delete(sample);
  Centroid centroid = new Centroid(datapoints.get(sample));
  centroid.setIndex(i);
  centroids.add(centroid);
 }
}

Popular methods of Multinomial

<init>
sample
set
getWeight
delete
getProbability
getWeights
Exposed for testing only. Returns a list of the leaf weights. These are in an order such that probin

Popular in Java

Finding current android device location
onCreateOptionsMenu (Activity)
scheduleAtFixedRate (Timer)
setContentView (Activity)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
Path (java.nio.file)
JComboBox (javax.swing)
JFileChooser (javax.swing)
Top plugins for WebStorm

How to use addmethodin org.apache.mahout.math.random.Multinomial

Best Java code snippets using org.apache.mahout.math.random.Multinomial.add (Showing top 20 results out of 315)

How to use
add
method
in
org.apache.mahout.math.random.Multinomial