public class demo { public demo() throws Exception { // TODO Auto-generated constructor stub BufferedReader breader = null; breader = new BufferedReader(new FileReader( "D:/logiciels/weka-3-7-12/weka-3-7-12/data/iris.arff")); Instances Train = new Instances(breader); //Train.setClassIndex(Train.numAttributes() - 1); // comment out this line SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setSeed(10); kMeans.setPreserveInstancesOrder(true); kMeans.setNumClusters(3); kMeans.buildClusterer(Train); int[] assignments = kMeans.getAssignments(); int i = 0; for (int clusterNum : assignments) { System.out.printf("Instance %d -> Cluster %d", i, clusterNum); i++; } breader.close(); } public static void main(String[] args) throws Exception { // TODO Auto-generated method stub new demo(); } }
SimpleKMeans kmeans = ... // your code ... Instances instances = kmeans.getClusterCentroids();
/** * Main method for executing this class. * * @param args use -h to list all parameters */ public static void main(String[] args) { runClusterer(new SimpleKMeans(), args); } }
/** * Returns default capabilities of the clusterer (i.e., the ones of * SimpleKMeans). * * @return the capabilities of this clusterer */ @Override public Capabilities getCapabilities() { Capabilities result = new SimpleKMeans().getCapabilities(); result.setOwner(this); return result; }
double bestSqE = Double.MAX_VALUE; for (i = 0; i < m_NumKMeansRuns; i++) { SimpleKMeans sk = new SimpleKMeans(); sk.setSeed(m_rr.nextInt()); sk.setNumClusters(m_num_clusters); sk.setNumExecutionSlots(m_executionSlots); sk.setDisplayStdDevs(true); sk.setDoNotCheckCapabilities(true); sk.setDontReplaceMissingValues(true); sk.buildClusterer(inst); if (sk.getSquaredError() < bestSqE) { bestSqE = sk.getSquaredError(); bestK = sk; m_num_clusters = bestK.numberOfClusters(); m_weights = new double[inst.numInstances()][m_num_clusters]; m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs]; m_priorsPrev = new double[m_num_clusters]; Instances centers = bestK.getClusterCentroids(); Instances stdD = bestK.getClusterStandardDevs(); double[][][] nominalCounts = bestK.getClusterNominalCounts(); double[] clusterSizes = bestK.getClusterSizes();
SimpleKMeans clusterer = new SimpleKMeans(); clusterer.setInitializationMethod(new SelectedTag(SimpleKMeans.KMEANS_PLUS_PLUS, SimpleKMeans.TAGS_SELECTION)); clusterer.setNumExecutionSlots(numSlots); clusterer.setNumClusters(numProductCentroids); clusterer.setMaxIterations(maxIterations); clusterer.setSeed(j + 1); clusterer.buildClusterer(datasets[i]); double SSE = clusterer.getSquaredError(); if (SSE < minSSE) { minSSE = SSE; System.out.println("Mininum SSE: " + minSSE + " Seed: " + bestClusterer.getSeed()); System.out.println("Saving best sub-quantizer in file.."); Instances clusterCentroids = bestClusterer.getClusterCentroids(); for (int j = 0; j < clusterCentroids.numInstances(); j++) { Instance centroid = clusterCentroids.instance(j);
long start = System.currentTimeMillis(); SimpleKMeans clusterer = new SimpleKMeans(); if (kMeansPlusPlus) { clusterer.setInitializationMethod(new SelectedTag(SimpleKMeans.KMEANS_PLUS_PLUS, SimpleKMeans.TAGS_SELECTION)); clusterer.setDebug(true); clusterer.setSeed(seed); clusterer.setNumClusters(numClusters); clusterer.setMaxIterations(maxIterations); clusterer.setNumExecutionSlots(numSlots); clusterer.setFastDistanceCalc(true); clusterer.buildClusterer(data); Instances clusterCentroids = clusterer.getClusterCentroids(); for (int j = 0; j < clusterCentroids.numInstances(); j++) { Instance centroid = clusterCentroids.instance(j);
private void trainModel(Map<Long, Double> metricData) throws Exception { //Model has a single metric_value attribute Attribute value = new Attribute("metric_value"); FastVector attributes = new FastVector(); attributes.addElement(value); trainingData = new Instances("metric_value_data", attributes, 0); for (Double val : metricData.values()) { double[] valArray = new double[] { val }; Instance instance = new Instance(1.0, valArray); trainingData.add(instance); } //Create and train the model model = new SimpleKMeans(); model.setNumClusters(k); model.setMaxIterations(20); model.setPreserveInstancesOrder(true); model.buildClusterer(trainingData); clusterCentroids = model.getClusterCentroids(); centroidAssignments = model.getAssignments(); setMeanDistancesToCentroids(); }
getCapabilities().testWithFail(data); m_FullMeansOrMediansOrModes = moveCentroid(0, instances, true, false); Random RandomO = new Random(getSeed()); int instIndex; HashMap<DecisionTableHashKey, Integer> initC = m_canopyClusters = new Canopy(); m_canopyClusters.setNumClusters(m_NumClusters); m_canopyClusters.setSeed(getSeed()); m_canopyClusters.setT2(getCanopyT2()); m_canopyClusters.setT1(getCanopyT1()); m_canopyClusters .setMaxNumCandidateCanopiesToHoldInMemory(getCanopyMaxNumCanopiesToHoldInMemory()); m_canopyClusters.setPeriodicPruningRate(getCanopyPeriodicPruningRate()); m_canopyClusters.setMinimumCanopyDensity(getCanopyMinimumCanopyDensity()); m_canopyClusters.setDebug(getDebug()); m_canopyClusters.buildClusterer(initInstances); kMeansPlusPlusInit(initInstances); canopyInit(initInstances); farthestFirstInit(initInstances); m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new double[m_NumClusters][instances.numAttributes()]; startExecutorPool();
setInitializationMethod(new SelectedTag(Integer.parseInt(initM), TAGS_SELECTION)); setCanopyMaxNumCanopiesToHoldInMemory(Integer.parseInt(temp)); setCanopyPeriodicPruningRate(Integer.parseInt(temp)); setCanopyMinimumCanopyDensity(Double.parseDouble(temp)); setCanopyT2(Double.parseDouble(temp)); setCanopyT1(Double.parseDouble(temp)); setNumClusters(Integer.parseInt(optionString)); setMaxIterations(Integer.parseInt(optionString)); distFunctionClassSpec[0] = ""; setDistanceFunction((DistanceFunction) Utils.forName( DistanceFunction.class, className, distFunctionClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); setNumExecutionSlots(Integer.parseInt(slotsS));
SimpleKMeans localKMeans = new SimpleKMeans(); try { localKMeans.setNumClusters(numClusters); localKMeans.setInitializationMethod(new SelectedTag( SimpleKMeans.KMEANS_PLUS_PLUS, SimpleKMeans.TAGS_SELECTION)); localKMeans.buildClusterer(sketchForRun); finalStartPointsForRuns.add(localKMeans.getClusterCentroids()); } catch (Exception ex) { throw new DistributedWekaException(ex);
You can implement k-means algorithm as: SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numberOfClusters); kmeans.buildClusterer(instances); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); int i=0; for(int clusterNum : assignments) { System.out.printf("Instance %d -> Cluster %d", i, clusterNum); i++; }
Instances instances = new Instances("iris.arff"); SimpleKMeans simpleKMeans = new SimpleKMeans(); // build clusterer simpleKMeans.setPreservationOrder(true); simpleKMeans.setNumClusters(2); simpleKMeans.buildClusterer(instances); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(simpleKMeans); eval.evaluateClusterer(instances); System.out.println("Cluster Evaluation: "+eval.clusterResultsToString());
/** * Default constructor. */ public ClusteringBased() { super(new LabelPowerset(new J48())); try { SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setNumClusters(5); kmeans.setDistanceFunction(new EuclideanDistance()); clusterer = kmeans; } catch (Exception ex) { Logger.getLogger(ClusteringBased.class.getName()).log(Level.SEVERE, null, ex); } }
/** * Initialize with the canopy centers of the Canopy clustering method * * @param data the training data * @throws Exception if a problem occurs */ protected void canopyInit(Instances data) throws Exception { if (m_canopyClusters == null) { m_canopyClusters = new Canopy(); m_canopyClusters.setNumClusters(m_NumClusters); m_canopyClusters.setSeed(getSeed()); m_canopyClusters.setT2(getCanopyT2()); m_canopyClusters.setT1(getCanopyT1()); m_canopyClusters .setMaxNumCandidateCanopiesToHoldInMemory(getCanopyMaxNumCanopiesToHoldInMemory()); m_canopyClusters.setPeriodicPruningRate(getCanopyPeriodicPruningRate()); m_canopyClusters.setMinimumCanopyDensity(getCanopyMinimumCanopyDensity()); m_canopyClusters.setDebug(getDebug()); m_canopyClusters.buildClusterer(data); } m_ClusterCentroids = m_canopyClusters.getCanopies(); }
/** * the default constructor. */ public SimpleKMeans() { super(); m_SeedDefault = 10; setSeed(m_SeedDefault); }
Random randomO = new Random(getSeed()); HashMap<DecisionTableHashKey, String> initC = new HashMap<DecisionTableHashKey, String>();
double bestSqE = Double.MAX_VALUE; for (i = 0; i < m_NumKMeansRuns; i++) { SimpleKMeans sk = new SimpleKMeans(); sk.setSeed(m_rr.nextInt()); sk.setNumClusters(m_num_clusters); sk.setNumExecutionSlots(m_executionSlots); sk.setDisplayStdDevs(true); sk.setDoNotCheckCapabilities(true); sk.setDontReplaceMissingValues(true); sk.buildClusterer(inst); if (sk.getSquaredError() < bestSqE) { bestSqE = sk.getSquaredError(); bestK = sk; m_num_clusters = bestK.numberOfClusters(); m_weights = new double[inst.numInstances()][m_num_clusters]; m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs]; m_priorsPrev = new double[m_num_clusters]; Instances centers = bestK.getClusterCentroids(); Instances stdD = bestK.getClusterStandardDevs(); double[][][] nominalCounts = bestK.getClusterNominalCounts(); double[] clusterSizes = bestK.getClusterSizes();
private void trainModel(Map<Long, Double> metricData) throws Exception { //Model has a single metric_value attribute Attribute value = new Attribute("metric_value"); FastVector attributes = new FastVector(); attributes.addElement(value); trainingData = new Instances("metric_value_data", attributes, 0); for (Double val : metricData.values()) { double[] valArray = new double[] { val }; Instance instance = new Instance(1.0, valArray); trainingData.add(instance); } //Create and train the model model = new SimpleKMeans(); model.setNumClusters(k); model.setMaxIterations(20); model.setPreserveInstancesOrder(true); model.buildClusterer(trainingData); clusterCentroids = model.getClusterCentroids(); centroidAssignments = model.getAssignments(); setMeanDistancesToCentroids(); }
getCapabilities().testWithFail(data); m_FullMeansOrMediansOrModes = moveCentroid(0, instances, true, false); Random RandomO = new Random(getSeed()); int instIndex; HashMap<DecisionTableHashKey, Integer> initC = m_canopyClusters = new Canopy(); m_canopyClusters.setNumClusters(m_NumClusters); m_canopyClusters.setSeed(getSeed()); m_canopyClusters.setT2(getCanopyT2()); m_canopyClusters.setT1(getCanopyT1()); m_canopyClusters .setMaxNumCandidateCanopiesToHoldInMemory(getCanopyMaxNumCanopiesToHoldInMemory()); m_canopyClusters.setPeriodicPruningRate(getCanopyPeriodicPruningRate()); m_canopyClusters.setMinimumCanopyDensity(getCanopyMinimumCanopyDensity()); m_canopyClusters.setDebug(getDebug()); m_canopyClusters.buildClusterer(initInstances); kMeansPlusPlusInit(initInstances); canopyInit(initInstances); farthestFirstInit(initInstances); m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new double[m_NumClusters][instances.numAttributes()]; startExecutorPool();