@Override public InitializeKMeans_F64 createAlg() { return new InitializePlusPlus(); } }
@Override public void selectSeeds(List<double[]> points, List<double[]> seeds) { if( seeds.size() > points.size() ) throw new IllegalArgumentException("More seeds requested than points!"); distance.resize(points.size()); // the first seed is randomly selected from the list of points double[] seed = points.get( rand.nextInt(points.size()) ); copyInto(seed,seeds.get(0)); // compute the distance each points is from the seed totalDistance = 0; for (int i = 0; i < points.size(); i++) { double[] p = points.get(i); double d = StandardKMeans_F64.distanceSq(p,seed); distance.data[i] = d; totalDistance += d; } // iteratively select the next seed and update the list of point distances for (int i = 1; i < seeds.size(); i++) { if( totalDistance == 0 ) { // if the total distance is zero that means there are duplicate points and that // all the unique points have already been added as seeds. just select a point // and copy it into rest of the seeds copyInto(seed, seeds.get(i)); } else { double target = rand.nextDouble(); copyInto(selectNextSeed(points, target), seeds.get(i)); updateDistances(points, seeds.get(i)); } } }
/** * Test seed selection by seeing if it has the expected distribution. */ @Test public void selectNextSeed() { InitializePlusPlus alg = new InitializePlusPlus(); alg.init(1,123); alg.distance.resize(3); alg.distance.data = new double[]{3,6,1}; alg.totalDistance = 10.0; List<double[]> points = new ArrayList<double[]>(); for (int i = 0; i < 3; i++) { points.add(new double[1]); } double histogram[] = new double[3]; for (int i = 0; i < 1000; i++) { double[] seed = alg.selectNextSeed(points,rand.nextDouble()); int which = -1; for (int j = 0; j < points.size(); j++) { if( points.get(j) == seed ) { which = j; break; } } histogram[which]++; } assertEquals(0.3,histogram[0]/1000.0,0.02); assertEquals(0.6,histogram[1]/1000.0,0.02); assertEquals(0.1,histogram[2]/1000.0,0.02); }
@Test public void updateDistances() { InitializePlusPlus alg = new InitializePlusPlus(); alg.init(1,123); alg.distance.resize(3); alg.distance.data = new double[]{3,6,1}; List<double[]> points = new ArrayList<double[]>(); for (int i = 0; i < 3; i++) { points.add(new double[]{i*i}); } alg.updateDistances(points, new double[]{-1}); assertEquals(1,alg.distance.get(0),1e-8); assertEquals(4,alg.distance.get(1),1e-8); assertEquals(1,alg.distance.get(2),1e-8); assertEquals(6,alg.totalDistance,1e-8); }
@Override public void selectSeeds(List<double[]> points, List<double[]> seeds) { if( seeds.size() > points.size() ) throw new IllegalArgumentException("More seeds requested than points!"); distance.resize(points.size()); // the first seed is randomly selected from the list of points double[] seed = points.get( rand.nextInt(points.size()) ); copyInto(seed,seeds.get(0)); // compute the distance each points is from the seed totalDistance = 0; for (int i = 0; i < points.size(); i++) { double[] p = points.get(i); double d = StandardKMeans_F64.distanceSq(p,seed); distance.data[i] = d; totalDistance += d; } // iteratively select the next seed and update the list of point distances for (int i = 1; i < seeds.size(); i++) { if( totalDistance == 0 ) { // if the total distance is zero that means there are duplicate points and that // all the unique points have already been added as seeds. just select a point // and copy it into rest of the seeds copyInto(seed, seeds.get(i)); } else { double target = rand.nextDouble(); copyInto(selectNextSeed(points, target), seeds.get(i)); updateDistances(points, seeds.get(i)); } } }
seed = new InitializePlusPlus(); } else { switch (initializer) { case PLUS_PLUS: seed = new InitializePlusPlus(); break;
seed = new InitializePlusPlus(); } else { switch (initializer) { case PLUS_PLUS: seed = new InitializePlusPlus(); break;