@Override public DistanceFunction<? super V> getDistanceFunction() { return innerkMeans.getDistanceFunction(); }
@Override public TypeInformation[] getInputTypeRestriction() { return innerKMeans.getInputTypeRestriction(); }
@Override public void setDistanceFunction(NumberVectorDistanceFunction<? super V> distanceFunction) { innerkMeans.setDistanceFunction(distanceFunction); }
@Override public Clustering<M> run(Database database, Relation<V> relation) { if(!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) { throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass()); } @SuppressWarnings("unchecked") final NumberVectorDistanceFunction<? super NumberVector> df = (NumberVectorDistanceFunction<? super NumberVector>) innerkMeans.getDistanceFunction(); Clustering<M> bestResult = null; double bestCost = Double.NaN; FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null; for(int i = 0; i < trials; i++) { Clustering<M> currentCandidate = innerkMeans.run(database, relation); double currentCost = qualityMeasure.quality(currentCandidate, df, relation); LOG.verbose("Cost of candidate " + i + ": " + currentCost); if(qualityMeasure.isBetter(currentCost, bestCost)) { bestResult = currentCandidate; bestCost = currentCost; } LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); return bestResult; }
@Override public <T extends V, O extends NumberVector> List<O> chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction, NumberVector.Factory<O> factory) { final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), rate, rnd); // Ugly cast, sorry @SuppressWarnings("unchecked") Relation<V> rel = (Relation<V>) relation; // FIXME: This does not necessarily hold. Check and fail! if(!distanceFunction.getInputTypeRestriction().isAssignableFromType(TypeUtil.NUMBER_VECTOR_FIELD)) { LoggingUtil.warning("Initializing k-means with k-means using specialized distance functions MAY fail, if the initialization method does require a distance defined on arbitrary number vectors."); } @SuppressWarnings("unchecked") NumberVectorDistanceFunction<? super V> pdf = (NumberVectorDistanceFunction<? super V>) distanceFunction; ProxyView<V> proxyv = new ProxyView<>(sample, rel); ProxyDatabase proxydb = new ProxyDatabase(sample, proxyv); innerkMeans.setK(k); innerkMeans.setDistanceFunction(pdf); Clustering<?> clusters = innerkMeans.run(proxydb, proxyv); List<O> means = new ArrayList<>(); for(Cluster<?> cluster : clusters.getAllClusters()) { means.add(factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation))); } return means; }
innerKMeans.setK(k_min); LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); splitInitializer.setInitialMeans(initializer.chooseInitialMeans(database, relation, k_min, getDistanceFunction())); Clustering<M> clustering = innerKMeans.run(database, relation); innerKMeans.setK(nextClusters.size()); clustering = innerKMeans.run(database, relation); clusters.clear(); clusters.addAll(clustering.getAllClusters());
/** * Constructor. * * @param distanceFunction Distance function * @param k_min k_min parameter - minimum number of result clusters * @param k_max k_max parameter - maximum number of result clusters * @param maxiter Maximum number of iterations each. * @param innerKMeans K-Means variant to use inside. * @param informationCriterion The information criterion used for the * splitting step * @param random Random factory */ public XMeans(NumberVectorDistanceFunction<? super V> distanceFunction, int k_min, int k_max, int maxiter, KMeans<V, M> innerKMeans, KMeansInitialization initializer, KMeansQualityMeasure<V> informationCriterion, RandomFactory random) { super(distanceFunction, k_min, maxiter, initializer); this.k_min = k_min; this.k_max = k_max; this.k = k_min; this.innerKMeans = innerKMeans; this.splitInitializer = new PredefinedInitialMeans((double[][]) null); this.innerKMeans.setInitializer(this.splitInitializer); this.innerKMeans.setDistanceFunction(distanceFunction); this.informationCriterion = informationCriterion; this.rnd = random; }
Clustering<M> innerResult = innerkMeans.run(proxyDB);
@Override public void setK(int k) { innerkMeans.setK(k); }
@Override public void setInitializer(KMeansInitialization init) { innerkMeans.setInitializer(init); }
@Override public Clustering<M> run(Database database, Relation<V> relation) { if(!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) { throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass()); } @SuppressWarnings("unchecked") final NumberVectorDistanceFunction<? super NumberVector> df = (NumberVectorDistanceFunction<? super NumberVector>) innerkMeans.getDistanceFunction(); Clustering<M> bestResult = null; double bestCost = Double.NaN; FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null; for(int i = 0; i < trials; i++) { Clustering<M> currentCandidate = innerkMeans.run(database, relation); double currentCost = qualityMeasure.quality(currentCandidate, df, relation); LOG.verbose("Cost of candidate " + i + ": " + currentCost); if(qualityMeasure.isBetter(currentCost, bestCost)) { bestResult = currentCandidate; bestCost = currentCost; } LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); return bestResult; }
@Override public double[][] chooseInitialMeans(Database database, Relation<? extends NumberVector> relation, int k, NumberVectorDistanceFunction<?> distanceFunction) { if(relation.size() < k) { throw new IllegalArgumentException("Cannot choose k=" + k + " means from N=" + relation.size() + " < k objects."); } final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), rate, rnd); if(sample.size() < k) { throw new IllegalArgumentException("Sampling rate=" + rate + " from N=" + relation.size() + " yields only " + sample.size() + " < k objects."); } // Ugly cast, sorry @SuppressWarnings("unchecked") Relation<V> rel = (Relation<V>) relation; // FIXME: This does not necessarily hold. Check and fail! if(!distanceFunction.getInputTypeRestriction().isAssignableFromType(TypeUtil.NUMBER_VECTOR_FIELD)) { LoggingUtil.warning("Initializing k-means with k-means using specialized distance functions MAY fail, if the initialization method does require a distance defined on arbitrary number vectors."); } @SuppressWarnings("unchecked") NumberVectorDistanceFunction<? super V> pdf = (NumberVectorDistanceFunction<? super V>) distanceFunction; ProxyView<V> proxyv = new ProxyView<>(sample, rel); ProxyDatabase proxydb = new ProxyDatabase(sample, proxyv); innerkMeans.setK(k); innerkMeans.setDistanceFunction(pdf); Clustering<?> clusters = innerkMeans.run(proxydb, proxyv); double[][] means = new double[clusters.getAllClusters().size()][]; int i = 0; for(Cluster<?> cluster : clusters.getAllClusters()) { means[i++] = ModelUtil.getPrototype(cluster.getModel(), relation).toArray(); } return means; }
innerKMeans.setK(k_min); LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); splitInitializer.setInitialMeans(initializer.chooseInitialMeans(database, relation, k_min, getDistanceFunction())); Clustering<M> clustering = innerKMeans.run(database, relation); innerKMeans.setK(nextClusters.size()); clustering = innerKMeans.run(database, relation); clusters.clear(); clusters.addAll(clustering.getAllClusters());
/** * Constructor. * * @param distanceFunction Distance function * @param k_min k_min parameter - minimum number of result clusters * @param k_max k_max parameter - maximum number of result clusters * @param maxiter Maximum number of iterations each. * @param innerKMeans K-Means variant to use inside. * @param informationCriterion The information criterion used for the * splitting step * @param random Random factory */ public XMeans(NumberVectorDistanceFunction<? super V> distanceFunction, int k_min, int k_max, int maxiter, KMeans<V, M> innerKMeans, KMeansInitialization initializer, KMeansQualityMeasure<V> informationCriterion, RandomFactory random) { super(distanceFunction, k_min, maxiter, initializer); this.k_min = k_min; this.k_max = k_max; this.k = k_min; this.innerKMeans = innerKMeans; this.splitInitializer = new PredefinedInitialMeans((double[][]) null); this.innerKMeans.setInitializer(this.splitInitializer); this.innerKMeans.setDistanceFunction(distanceFunction); this.informationCriterion = informationCriterion; this.rnd = random; }
Clustering<M> innerResult = innerkMeans.run(proxyDB);
@Override public void setK(int k) { innerkMeans.setK(k); }
@Override public void setInitializer(KMeansInitialization init) { innerkMeans.setInitializer(init); }
@Override public Clustering<M> run(Database database, Relation<V> relation) { if(!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) { throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass()); } @SuppressWarnings("unchecked") final NumberVectorDistanceFunction<? super NumberVector> df = (NumberVectorDistanceFunction<? super NumberVector>) innerkMeans.getDistanceFunction(); Clustering<M> bestResult = null; double bestCost = Double.NaN; FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null; for(int i = 0; i < trials; i++) { Clustering<M> currentCandidate = innerkMeans.run(database, relation); double currentCost = qualityMeasure.quality(currentCandidate, df, relation); if(LOG.isVerbose()) { LOG.verbose("Cost of candidate " + i + ": " + currentCost); } if(qualityMeasure.isBetter(currentCost, bestCost)) { bestResult = currentCandidate; bestCost = currentCost; } LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); return bestResult; }
@Override public double[][] chooseInitialMeans(Database database, Relation<? extends NumberVector> relation, int k, NumberVectorDistanceFunction<?> distanceFunction) { if(relation.size() < k) { throw new IllegalArgumentException("Cannot choose k=" + k + " means from N=" + relation.size() + " < k objects."); } final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), rate, rnd); if(sample.size() < k) { throw new IllegalArgumentException("Sampling rate=" + rate + " from N=" + relation.size() + " yields only " + sample.size() + " < k objects."); } // Ugly cast, sorry @SuppressWarnings("unchecked") Relation<V> rel = (Relation<V>) relation; // FIXME: This does not necessarily hold. Check and fail! if(!distanceFunction.getInputTypeRestriction().isAssignableFromType(TypeUtil.NUMBER_VECTOR_FIELD)) { LoggingUtil.warning("Initializing k-means with k-means using specialized distance functions MAY fail, if the initialization method does require a distance defined on arbitrary number vectors."); } @SuppressWarnings("unchecked") NumberVectorDistanceFunction<? super V> pdf = (NumberVectorDistanceFunction<? super V>) distanceFunction; ProxyView<V> proxyv = new ProxyView<>(sample, rel); ProxyDatabase proxydb = new ProxyDatabase(sample, proxyv); innerkMeans.setK(k); innerkMeans.setDistanceFunction(pdf); Clustering<?> clusters = innerkMeans.run(proxydb, proxyv); double[][] means = new double[clusters.getAllClusters().size()][]; int i = 0; for(Cluster<?> cluster : clusters.getAllClusters()) { means[i++] = ModelUtil.getPrototype(cluster.getModel(), relation).toArray(); } return means; }
innerKMeans.setK(k_min); if(LOG.isStatistics()) { LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); Clustering<M> clustering = innerKMeans.run(database, relation); innerKMeans.setK(nextClusters.size()); clustering = innerKMeans.run(database, relation); clusters.clear(); clusters.addAll(clustering.getAllClusters());