@Override protected XMeans<V, M> makeInstance() { return new XMeans<V, M>(distanceFunction, k_min, k_max, maxiter, innerKMeans, initializer, informationCriterion, random); } }
splitInitializer.setInitialMeans(splitCentroid(parentCluster, relation)); innerKMeans.setK(2); Clustering<M> childClustering = innerKMeans.run(proxyDB); double parentEvaluation = informationCriterion.quality(parentClustering, getDistanceFunction(), relation); double childrenEvaluation = informationCriterion.quality(childClustering, getDistanceFunction(), relation);
splitInitializer.setInitialMeans(initializer.chooseInitialMeans(database, relation, k_min, getDistanceFunction())); Clustering<M> clustering = innerKMeans.run(database, relation); for(Cluster<M> cluster : clusters) { List<Cluster<M>> childClusterList = splitCluster(cluster, database, relation); nextClusters.addAll(childClusterList); if(childClusterList.size() > 1) {
/** * Split an existing centroid into two initial centers. * * @param parentCluster Existing cluster * @param relation Data relation * @return List of new centroids */ protected List<? extends NumberVector> splitCentroid(Cluster<? extends MeanModel> parentCluster, Relation<V> relation) { Vector parentCentroid = parentCluster.getModel().getMean(); // Compute size of cluster/region double radius = 0.; for(DBIDIter it = parentCluster.getIDs().iter(); it.valid(); it.advance()) { double d = getDistanceFunction().distance(relation.get(it), parentCentroid); radius = (d > radius) ? d : radius; } // Choose random vector Random random = rnd.getSingleThreadedRandom(); final int dim = RelationUtil.dimensionality(relation); Vector randomVector = VectorUtil.randomVector(Vector.FACTORY, dim, random).normalize(); randomVector.timesEquals((.4 + random.nextDouble() * .5) * radius); // Get the new centroids ArrayList<Vector> vecs = new ArrayList<>(2); vecs.add(parentCentroid.minus(randomVector)); vecs.add(randomVector.plusEquals(parentCentroid)); return vecs; }
/** * A very basic X-means test run. */ @Test public void testXMeans() { Database db = makeSimpleDatabase(UNITTEST + "3clusters-and-noise-2d.csv", 330); Clustering<?> result = new ELKIBuilder<XMeans<DoubleVector, ?>>(XMeans.class) // .with(XMeans.Parameterizer.K_MIN_ID, 2) // .with(KMeans.K_ID, 20) // .with(XMeans.Parameterizer.INNER_KMEANS_ID, KMeansLloyd.class) // .with(XMeans.Parameterizer.INFORMATION_CRITERION_ID, BayesianInformationCriterion.class) // .with(KMeans.SEED_ID, 0) // // Initializer seed .with(XMeans.Parameterizer.SEED_ID, 0) // // X-means seed .build().run(db); testFMeasure(db, result, 0.95927231008); testClusterSizes(result, new int[] { 1, 2, 2, 2, 3, 5, 5, 51, 106, 153 }); } }
splitInitializer.setInitialMeans(splitCentroid(parentCluster, relation)); innerKMeans.setK(2); Clustering<M> childClustering = innerKMeans.run(proxyDB); double parentEvaluation = informationCriterion.quality(parentClustering, getDistanceFunction(), relation); double childrenEvaluation = informationCriterion.quality(childClustering, getDistanceFunction(), relation);
splitInitializer.setInitialMeans(initializer.chooseInitialMeans(database, relation, k_min, getDistanceFunction())); Clustering<M> clustering = innerKMeans.run(database, relation); for(Cluster<M> cluster : clusters) { List<Cluster<M>> childClusterList = splitCluster(cluster, database, relation); nextClusters.addAll(childClusterList); if(childClusterList.size() > 1) {
/** * Split an existing centroid into two initial centers. * * @param parentCluster Existing cluster * @param relation Data relation * @return List of new centroids */ protected double[][] splitCentroid(Cluster<? extends MeanModel> parentCluster, Relation<V> relation) { double[] parentCentroid = parentCluster.getModel().getMean(); // Compute size of cluster/region double radius = 0.; for(DBIDIter it = parentCluster.getIDs().iter(); it.valid(); it.advance()) { double d = getDistanceFunction().distance(relation.get(it), DoubleVector.wrap(parentCentroid)); radius = (d > radius) ? d : radius; } // Choose random vector Random random = rnd.getSingleThreadedRandom(); final int dim = RelationUtil.dimensionality(relation); double[] randomVector = normalize(MathUtil.randomDoubleArray(dim, random)); timesEquals(randomVector, (.4 + random.nextDouble() * .5) * radius); // Get the new centroids for(int d = 0; d < dim; d++) { double a = parentCentroid[d], b = randomVector[d]; parentCentroid[d] = a - b; randomVector[d] = a + b; } return new double[][] { parentCentroid, randomVector }; }
splitInitializer.setInitialMeans(splitCentroid(parentCluster, relation)); innerKMeans.setK(2); Clustering<M> childClustering = innerKMeans.run(proxyDB); double parentEvaluation = informationCriterion.quality(parentClustering, getDistanceFunction(), relation); double childrenEvaluation = informationCriterion.quality(childClustering, getDistanceFunction(), relation);
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); splitInitializer.setInitialMeans(initializer.chooseInitialMeans(database, relation, k_min, getDistanceFunction(), Vector.FACTORY)); Clustering<M> clustering = innerKMeans.run(database, relation); for(Cluster<M> cluster : clusters) { List<Cluster<M>> childClusterList = splitCluster(cluster, database, relation); nextClusters.addAll(childClusterList); if(childClusterList.size() > 1) {
@Override protected XMeans<V, M> makeInstance() { return new XMeans<V, M>(distanceFunction, k_min, k_max, maxiter, innerKMeans, initializer, informationCriterion, random); } }
/** * Split an existing centroid into two initial centers. * * @param parentCluster Existing cluster * @param relation Data relation * @return List of new centroids */ protected double[][] splitCentroid(Cluster<? extends MeanModel> parentCluster, Relation<V> relation) { double[] parentCentroid = parentCluster.getModel().getMean(); // Compute size of cluster/region double radius = 0.; for(DBIDIter it = parentCluster.getIDs().iter(); it.valid(); it.advance()) { double d = getDistanceFunction().distance(relation.get(it), DoubleVector.wrap(parentCentroid)); radius = (d > radius) ? d : radius; } // Choose random vector Random random = rnd.getSingleThreadedRandom(); final int dim = RelationUtil.dimensionality(relation); double[] randomVector = normalize(MathUtil.randomDoubleArray(dim, random)); timesEquals(randomVector, (.4 + random.nextDouble() * .5) * radius); // Get the new centroids for(int d = 0; d < dim; d++) { double a = parentCentroid[d], b = randomVector[d]; parentCentroid[d] = a - b; randomVector[d] = a + b; } return new double[][] { parentCentroid, randomVector }; }
@Override protected XMeans<V, M> makeInstance() { return new XMeans<V, M>(distanceFunction, k_min, k_max, maxiter, innerKMeans, initializer, splitInitializer, informationCriterion, random); } }