@Override protected KMedoidsPAM<V> makeInstance() { return new KMedoidsPAM<>(distanceFunction, k, maxiter, initializer); } }
/** * Run k-medoids * * @param database Database * @param relation relation to use * @return result */ public Clustering<MedoidModel> run(Database database, Relation<V> relation) { if(k > 0x7FFF) { throw new NotImplementedException("PAM supports at most " + 0x7FFF + " clusters."); } DistanceQuery<V> distQ = DatabaseUtil.precomputedDistanceQuery(database, relation, getDistanceFunction(), LOG); DBIDs ids = relation.getDBIDs(); ArrayModifiableDBIDs medoids = initialMedoids(distQ, ids); // Setup cluster assignment store WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1); Duration optd = getLogger().newDuration(getClass().getName() + ".optimization-time").begin(); run(distQ, ids, medoids, assignment); getLogger().statistics(optd.end()); ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k); // Wrap result Clustering<MedoidModel> result = new Clustering<>("PAM Clustering", "pam-clustering"); for(DBIDArrayIter it = medoids.iter(); it.valid(); it.advance()) { result.addToplevelCluster(new Cluster<>(clusters[it.getOffset()], new MedoidModel(DBIDUtil.deref(it)))); } return result; }
return new Clustering<>("PAM Clustering", "pam-clustering"); DistanceQuery<V> distQ = database.getDistanceQuery(relation, getDistanceFunction(), DatabaseQuery.HINT_OPTIMIZED_ONLY); DBIDs ids = relation.getDBIDs(); if(distQ == null && ids instanceof DBIDRange) { LOG.verbose("Adding a distance matrix index to accelerate PAM."); PrecomputedDistanceMatrix<V> idx = new PrecomputedDistanceMatrix<V>(relation, getDistanceFunction()); idx.initialize(); distQ = idx.getDistanceQuery(getDistanceFunction()); distQ = database.getDistanceQuery(relation, getDistanceFunction()); LOG.warning("PAM may be slow, because we do not have a precomputed distance matrix available."); runPAMOptimization(distQ, ids, medoids, assignment);
@Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); }
/** * Run KMedians PAM with fixed parameters and compare the result to a golden * standard. */ @Test public void testKMedoidsPAM() { Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); Clustering<MedoidModel> result = new ELKIBuilder<KMedoidsPAM<DoubleVector>>(KMedoidsPAM.class) // .with(KMeans.K_ID, 5) // .build().run(db); testFMeasure(db, result, 0.998005); testClusterSizes(result, new int[] { 199, 200, 200, 200, 201 }); } }
/** * Choose the initial medoids. * * @param distQ Distance query * @param ids IDs to choose from * @return Initial medoids */ protected ArrayModifiableDBIDs initialMedoids(DistanceQuery<V> distQ, DBIDs ids) { if(getLogger().isStatistics()) { getLogger().statistics(new StringStatistic(getClass().getName() + ".initialization", initializer.toString())); } Duration initd = getLogger().newDuration(getClass().getName() + ".initialization-time").begin(); ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, ids, distQ)); getLogger().statistics(initd.end()); if(medoids.size() != k) { throw new AbortException("Initializer " + initializer.toString() + " did not return " + k + " means, but " + medoids.size()); } return medoids; }
double tc = assignToNearestCluster(medoids, ids, nearest, second, assignment, distQ); if(LOG.isStatistics()) { LOG.statistics(new DoubleStatistic(KEY + ".iteration-" + 0 + ".cost", tc)); double nc = assignToNearestCluster(medoids, ids, nearest, second, assignment, distQ); if(LOG.isStatistics()) { LOG.statistics(new DoubleStatistic(KEY + ".iteration-" + iteration + ".cost", nc));
/** * Run k-medoids * * @param database Database * @param relation relation to use * @return result */ public Clustering<MedoidModel> run(Database database, Relation<V> relation) { if(k > 0x7FFF) { throw new NotImplementedException("PAM supports at most " + 0x7FFF + " clusters."); } DistanceQuery<V> distQ = DatabaseUtil.precomputedDistanceQuery(database, relation, getDistanceFunction(), LOG); DBIDs ids = relation.getDBIDs(); ArrayModifiableDBIDs medoids = initialMedoids(distQ, ids); // Setup cluster assignment store WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1); Duration optd = getLogger().newDuration(getClass().getName() + ".optimization-time").begin(); run(distQ, ids, medoids, assignment); getLogger().statistics(optd.end()); ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k); // Wrap result Clustering<MedoidModel> result = new Clustering<>("PAM Clustering", "pam-clustering"); for(DBIDArrayIter it = medoids.iter(); it.valid(); it.advance()) { result.addToplevelCluster(new Cluster<>(clusters[it.getOffset()], new MedoidModel(DBIDUtil.deref(it)))); } return result; }
@Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); }
/** * Choose the initial medoids. * * @param distQ Distance query * @param ids IDs to choose from * @return Initial medoids */ protected ArrayModifiableDBIDs initialMedoids(DistanceQuery<V> distQ, DBIDs ids) { if(getLogger().isStatistics()) { getLogger().statistics(new StringStatistic(getClass().getName() + ".initialization", initializer.toString())); } Duration initd = getLogger().newDuration(getClass().getName() + ".initialization-time").begin(); ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, ids, distQ)); getLogger().statistics(initd.end()); if(medoids.size() != k) { throw new AbortException("Initializer " + initializer.toString() + " did not return " + k + " means, but " + medoids.size()); } return medoids; }
@Override protected KMedoidsPAM<V> makeInstance() { return new KMedoidsPAM<>(distanceFunction, k, maxiter, initializer); } }
@Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); }
@Override protected KMedoidsPAM<V> makeInstance() { return new KMedoidsPAM<>(distanceFunction, k, maxiter, initializer); } }