/*** * Public method for creating a new cluster instance. The new cluster is associated with * the centroids for each feature. * * @return the new cluster instance */ public Cluster createCluster() { return (new ClusterFactory(this.typeDefs, onlineUpdate)).create(); } }
@Override public Map<String, Instance> call(Tuple2<String, Instance> instance) throws Exception { Map<String, Instance> result = new HashMap<String, Instance>(); Cluster c = clusterFactory.create(); c.add(instance._2); result.put(c.getId(), c); return result; }
ClusterFactory clusterFactory = new ClusterFactory(this.typeDefs, this.onlineUpdate);
@Override public Instance call(Instance inst1, Instance inst2) throws Exception { Cluster c = clusterFactory.create(); // aggregate inst1 and inst2 in a cluster updateCluster(inst1, c); updateCluster(inst2, c); return c; }
ClusterFactory clusterFactory = new ClusterFactory(this.typeDefs, this.onlineUpdate);
@Override public Iterable<Instance> call(Iterator<Tuple2<String, Instance>> instances) throws Exception { Map<String, Instance> clusters = new HashMap<String, Instance>(); BestClusterFunction bestClusterFunc = new BestClusterFunction(distFunc, clusters, threshold); while (instances.hasNext()) { Tuple2<String, Instance> inst = instances.next(); Tuple2<String, Instance> result = bestClusterFunc.call(inst); Cluster c; if (result._1 == null) { c = clusterFactory.create(); clusters.put(c.getId(), c); } else { c = (Cluster)clusters.get(result._1); } c.add(inst._2); } return clusters.values(); }
@Override public SparkClusterResult doCluster(DataSet ds) { distFunc = new DistanceFunction(this.typeDefs); clusterFactory = new ClusterFactory(this.typeDefs, this.onlineUpdate);
@Override public Map<String, Instance> call(Tuple2<String, Instance> inst) throws Exception { Instance bestCluster = null; double bestScore = Double.MAX_VALUE; for (String clusterId : clusters.keySet()) { Instance cluster = clusters.get(clusterId); double d = distFunc.distance(inst._2, cluster); if (d < bestScore && d < threshold) { bestScore = d; bestCluster = cluster; } } Map<String, Instance> result = new HashMap<String, Instance>(); if (bestCluster == null) { Cluster c = clusterFactory.create(); c.add(inst._2); result.put(c.getId(), c); } else { result.put(bestCluster.getId(), bestCluster); } return result; } }