weka.clusterers.EM java code examples

/** Creates a default EM */
public Clusterer getClusterer() {
 return new EM();
}

 System.out.println("Seed: " + getSeed());
m_rr = new Random(getSeed());
  + "\nNumber of atts: " + m_num_attribs + "\n");
startExecutorPool();
  CVClusters();
  m_rr = new Random(getSeed());
  for (int i = 0; i < 10; i++) {
   m_rr.nextDouble();
EM_Init(m_theInstances);
double loglikely = iterate(m_theInstances, m_verbose);
if (m_Debug) {
 System.err.println("Current log-likelihood: " + loglikely);

 EM_Report(inst);
int seed = getSeed();
int restartCount = 0;
m_iterationsPerformed = -1;
   llkold = llk;
   llk = launchESteps(inst);
   launchMSteps(inst);
   restartCount = 0;
  EM_Init(m_theInstances);
  startExecutorPool();
 EM_Report(inst);

 /**
  * Main method for testing this class.
  * 
  * @param argv should contain the following arguments:
  *          <p>
  *          -t training file [-T test file] [-N number of clusters] [-S random
  *          seed]
  */
 public static void main(String[] argv) {
  runClusterer(new EM(), argv);
 }
}

EM clusterer = new EM();
 clusterer.buildClusterer(dataClusterer);

resetOptions();
setDebug(Utils.getFlag('V', options));
String optionString = Utils.getOption('I', options);
 setMaxIterations(Integer.parseInt(optionString));
 setNumFolds(Integer.parseInt(optionString));
 setMinLogLikelihoodImprovementIterating(Double.parseDouble(optionString));
 setMinLogLikelihoodImprovementCV(Double.parseDouble(optionString));
 setNumClusters(Integer.parseInt(optionString));
 setMaximumNumberOfClusters(Integer.parseInt(optionString));
 setMinStdDev((new Double(optionString)).doubleValue());
 setNumKMeansRuns((new Integer(optionString)).intValue());
setDisplayModelInOldFormat(Utils.getFlag('O', options));
 setNumExecutionSlots(Integer.parseInt(slotsS));

EM clusterer = new EM();
try {
  clusterer.setNumClusters(clusters);
  System.out.println("clustering");
  clusterer.buildClusterer(transposed);
    childrenLabels[clusterer.clusterInstance(transposed.instance(i))].add(labels.get(i));

/**
 * returns a configured cluster algorithm
 */
protected Clusterer getClusterer() {
 EM c = new EM();
 try {
  c.setOptions(new String[0]);
 }
 catch (Exception e) {
  e.printStackTrace();
 }
 return c;
}

 M(inst);
 return;
new_estimators();
estimate_priors(inst);
M_reEstimate(inst);

int seed = getSeed();
int restartCount = 0;
CLUSTER_SEARCH: while (CVincreased) {
 cvr = new Random(getSeed());
 trainCopy = new Instances(m_theInstances);
 trainCopy.randomize(cvr);
  EM_Init(cvTrain);
  try {
   iterate(cvTrain, false);
  } catch (Exception ex) {
   tll = E(cvTest, false);
  } catch (Exception ex) {
  seed = getSeed();
  templl /= numFolds;

/**
 * The M step of the EM algorithm.
 * 
 * @param inst the training instances
 * @throws Exception if something goes wrong
 */
private void M(Instances inst) throws Exception {
 int i, j, l;
 new_estimators();
 estimate_priors(inst);
 // sum
 for (l = 0; l < inst.numInstances(); l++) {
  Instance in = inst.instance(l);
  for (i = 0; i < m_num_clusters; i++) {
   for (j = 0; j < m_num_attribs; j++) {
    if (inst.attribute(j).isNominal()) {
     m_model[i][j]
      .addValue(in.value(j), in.weight() * m_weights[l][i]);
    } else {
     m_modelNormal[i][j][0] += (in.value(j) * in.weight() * m_weights[l][i]);
     m_modelNormal[i][j][2] += in.weight() * m_weights[l][i];
     m_modelNormal[i][j][1] += (in.value(j) * in.value(j)
      * in.weight() * m_weights[l][i]);
    }
   }
  }
 }
 // re-estimate Gaussian parameters
 M_reEstimate(inst);
}

return E(inst, true);

resetOptions();
setDebug(Utils.getFlag('V', options));
String optionString = Utils.getOption('I', options);
 setMaxIterations(Integer.parseInt(optionString));
 setNumFolds(Integer.parseInt(optionString));
 setMinLogLikelihoodImprovementIterating(Double.parseDouble(optionString));
 setMinLogLikelihoodImprovementCV(Double.parseDouble(optionString));
 setNumClusters(Integer.parseInt(optionString));
 setMaximumNumberOfClusters(Integer.parseInt(optionString));
 setMinStdDev((new Double(optionString)).doubleValue());
 setNumKMeansRuns((new Integer(optionString)).intValue());
setDisplayModelInOldFormat(Utils.getFlag('O', options));
 setNumExecutionSlots(Integer.parseInt(slotsS));

 /**
  * Main method for testing this class.
  * 
  * @param argv should contain the following arguments:
  *          <p>
  *          -t training file [-T test file] [-N number of clusters] [-S random
  *          seed]
  */
 public static void main(String[] argv) {
  runClusterer(new EM(), argv);
 }
}

/**
 * returns a configured cluster algorithm
 */
protected Clusterer getClusterer() {
 EM c = new EM();
 try {
  c.setOptions(new String[0]);
 }
 catch (Exception e) {
  e.printStackTrace();
 }
 return c;
}

// generate data for clusterer (w/o class)
 Remove filter = new Remove();
 filter.setAttributeIndices("" + (data.classIndex() + 1));
 try {
   filter.setInputFormat(data);
 } catch (Exception e) {
   e.printStackTrace();
 }
 Instances dataClusterer = Filter.useFilter(data, filter);
 // train clusterer
 EM clusterer = new EM();
 // set further options for EM, if necessary...
 // clusterer.setNumClusters(maxNumofClusters); //-1 for n number of clusters
 clusterer.buildClusterer(dataClusterer);

 M(inst);
 return;
new_estimators();
estimate_priors(inst);
M_reEstimate(inst);

int seed = getSeed();
int restartCount = 0;
CLUSTER_SEARCH: while (CVincreased) {
 cvr = new Random(getSeed());
 trainCopy = new Instances(m_theInstances);
 trainCopy.randomize(cvr);
  EM_Init(cvTrain);
  try {
   iterate(cvTrain, false);
  } catch (Exception ex) {
   tll = E(cvTest, false);
  } catch (Exception ex) {
  seed = getSeed();
  templl /= numFolds;

/**
 * The M step of the EM algorithm.
 * 
 * @param inst the training instances
 * @throws Exception if something goes wrong
 */
private void M(Instances inst) throws Exception {
 int i, j, l;
 new_estimators();
 estimate_priors(inst);
 // sum
 for (l = 0; l < inst.numInstances(); l++) {
  Instance in = inst.instance(l);
  for (i = 0; i < m_num_clusters; i++) {
   for (j = 0; j < m_num_attribs; j++) {
    if (inst.attribute(j).isNominal()) {
     m_model[i][j]
      .addValue(in.value(j), in.weight() * m_weights[l][i]);
    } else {
     m_modelNormal[i][j][0] += (in.value(j) * in.weight() * m_weights[l][i]);
     m_modelNormal[i][j][2] += in.weight() * m_weights[l][i];
     m_modelNormal[i][j][1] += (in.value(j) * in.value(j)
      * in.weight() * m_weights[l][i]);
    }
   }
  }
 }
 // re-estimate Gaussian parameters
 M_reEstimate(inst);
}

return E(inst, true);

Javadoc

Simple EM (expectation maximisation) class.

EM assigns a probability distribution to each instance which indicates the probability of it belonging to each of the clusters. EM can decide how many clusters to create by cross validation, or you may specify apriori how many clusters to generate.

The cross validation performed to determine the number of clusters is done in the following steps:
1. the number of clusters is set to 1
2. the training set is split randomly into 10 folds.
3. EM is performed 10 times using the 10 folds the usual CV way.
4. the loglikelihood is averaged over all 10 results.
5. if loglikelihood has increased the number of clusters is increased by 1 and the program continues at step 2.

The number of folds is fixed to 10, as long as the number of instances in the training set is not smaller 10. If this is the case the number of folds is set equal to the number of instances.

Missing values are globally replaced with ReplaceMissingValues.

Valid options are:

 -N <num> 
number of clusters. If omitted or -1 specified, then  
cross validation is used to select the number of clusters.

 -X <num> 
Number of folds to use when cross-validating to find the best number of clusters.

 -K <num> 
Number of runs of k-means to perform. 
(default 10)

 -max <num> 
Maximum number of clusters to consider during cross-validation. If omitted or -1 specified, then  
there is no upper limit on the number of clusters.

 -ll-cv <num> 
Minimum improvement in cross-validated log likelihood required 
to consider increasing the number of clusters. 
(default 1e-6)

 -I <num> 
max iterations. 
(default 100)

 -ll-iter <num> 
Minimum improvement in log likelihood required 
to perform another iteration of the E and M steps. 
(default 1e-6)

 -V 
verbose.

 -M <num> 
minimum allowable standard deviation for normal density 
computation 
(default 1e-6)

 -O 
Display model in old format (good when there are many clusters)

 -num-slots <num> 
Number of execution slots. 
(default 1 - i.e. no parallelism)

 -S <num> 
Random number seed. 
(default 100)

 -output-debug-info 
If set, clusterer is run in debug mode and 
may output additional info to the console

 -do-not-check-capabilities 
If set, clusterer capabilities are not checked before clusterer is built 
(use with caution).

Most used methods

<init>
Constructor.
setNumClusters
Set the number of clusters (-1 to select by CV).
CVClusters
estimate the number of clusters by cross validation on the training data.
E
The E step of the EM algorithm. Estimate cluster membership probabilities.
EM_Init
Initialize the global aggregated estimators and storage.
EM_Report
verbose output for debugging
M
The M step of the EM algorithm.
M_reEstimate
buildClusterer
Generates a clusterer. Has to initialize all fields of the clusterer that are not being set via opti
distributionForInstance
doEM
Perform the EM algorithm
estimate_priors
calculate prior probabilites for the clusters

Popular in Java

Finding current android device location
getSystemService (Context)
scheduleAtFixedRate (ScheduledExecutorService)
getExternalFilesDir (Context)
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Github Copilot alternatives

How to useEM in weka.clusterers

Best Java code snippets using weka.clusterers.EM (Showing top 20 results out of 315)

How to use
EM
in
weka.clusterers