@Override protected SingleAssignmentKMeans<V> makeInstance() { return new SingleAssignmentKMeans<>(distanceFunction, k, initializer); } }
@Override public Clustering<KMeansModel> run(Database database, Relation<V> relation) { Instance instance = new Instance(relation, getDistanceFunction(), initialMeans(database, relation)); instance.run(1); return instance.buildResult(); }
@Override public Clustering<KMeansModel> run(Database database, Relation<V> relation) { if(relation.size() <= 0) { return new Clustering<>("k-Means Assignment", "kmeans-assignment"); } // Choose initial means if(LOG.isStatistics()) { LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); } List<Vector> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction(), Vector.FACTORY); // Setup cluster assignment store List<ModifiableDBIDs> clusters = new ArrayList<>(); for(int i = 0; i < k; i++) { clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k))); } WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1); double[] varsum = new double[k]; assignToNearestCluster(relation, means, clusters, assignment, varsum); // Wrap result Clustering<KMeansModel> result = new Clustering<>("Nearest Centroid Clustering", "nearest-center-clustering"); for(int i = 0; i < clusters.size(); i++) { KMeansModel model = new KMeansModel(means.get(i), varsum[i]); result.addToplevelCluster(new Cluster<>(clusters.get(i), model)); } return result; }
@Test public void testSingleAssignmentKMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 7) // .build().run(db); // Unsurprisingly, these results are much worse than normal k-means testFMeasure(db, result, 0.702733122); testClusterSizes(result, new int[] { 64, 95, 202, 306, 333 }); } }
@Test public void testRandomNormalGeneratedInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 0) // .with(KMeans.INIT_ID, RandomNormalGeneratedInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.7697589); testClusterSizes(result, new int[] { 0, 158, 200, 242, 400 }); } }
@Override public Clustering<KMeansModel> run(Database database, Relation<V> relation) { Instance instance = new Instance(relation, getDistanceFunction(), initialMeans(database, relation)); instance.run(1); return instance.buildResult(); }
@Override protected SingleAssignmentKMeans<V> makeInstance() { return new SingleAssignmentKMeans<>(distanceFunction, k, initializer); } }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testParkInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.INIT_ID, ParkInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.5872566); testClusterSizes(result, new int[] { 2, 9, 110, 422, 457 }); }
@Override protected SingleAssignmentKMeans<V> makeInstance() { return new SingleAssignmentKMeans<>(distanceFunction, k, initializer); } }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testPAMInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.INIT_ID, PAMInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.99800500); testClusterSizes(result, new int[] { 199, 200, 200, 200, 201 }); }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testFirstKInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.INIT_ID, FirstKInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.62025907); testClusterSizes(result, new int[] { 23, 38, 226, 258, 455 }); }
@Test public void testLinearBUILDInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 5) // .with(KMeans.INIT_ID, LABInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.9901); testClusterSizes(result, new int[] { 195, 200, 200, 200, 205 }); }
@Test public void testSingleAssignmentOstrovsky() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 3) // .with(KMeans.INIT_ID, OstrovskyInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.9980); testClusterSizes(result, new int[] { 199, 200, 200, 200, 201 }); } }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testRandomlyChosenInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 7) // .with(KMeans.INIT_ID, RandomlyChosenInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.702733); testClusterSizes(result, new int[] { 64, 95, 202, 306, 333 }); }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testSingleAssignmentKMeansPlusPlus() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 3) // .with(KMeans.INIT_ID, KMeansPlusPlusInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.99205); testClusterSizes(result, new int[] { 197, 199, 200, 201, 203 }); }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testFarthestSumPointsInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 3) // .with(KMeans.INIT_ID, FarthestSumPointsInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.80399668933); testClusterSizes(result, new int[] { 32, 169, 199, 201, 399 }); }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testFarthestPointsInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 3) // .with(KMeans.INIT_ID, FarthestPointsInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.88132453); testClusterSizes(result, new int[] { 128, 199, 201, 201, 271 }); }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testRandomlyGeneratedInitialMeans() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 0) // .with(KMeans.INIT_ID, RandomUniformGeneratedInitialMeans.class) // .build().run(db); testFMeasure(db, result, 0.74344789); testClusterSizes(result, new int[] { 1, 145, 208, 246, 400 }); } }
/** * Run KMeans with fixed parameters and compare the result to a golden * standard. */ @Test public void testSampleKMeansInitialization() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<?> result = new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class) // .with(KMeans.K_ID, 5) // .with(KMeans.SEED_ID, 8) // .with(KMeans.INIT_ID, SampleKMeansInitialization.class) // .with(SampleKMeansInitialization.Parameterizer.KMEANS_ID, KMeansHamerly.class) // .with(KMeans.SEED_ID, 8) // .with(SampleKMeansInitialization.Parameterizer.SAMPLE_ID, 100) // .build().run(db); testFMeasure(db, result, 0.99601); testClusterSizes(result, new int[] { 199, 199, 200, 201, 201 }); } }