@Override public Result cluster(DataSource<short[]> ds) { try { Result result = cluster(ds, conf.K); result.nn = conf.factory.create(result.centroids); return result; } catch (Exception e) { throw new RuntimeException(e); } }
/** * Main clustering algorithm. A number of threads as specified are * started each containing an assignment job and a reference to * the same set of ShortNearestNeighbours object (i.e. Exact or KDTree). * Each thread is added to a job pool and started in parallel. * A single accumulator is shared between all threads and locked on update. * <br/> * This methods expects that the initial centroids have already been set in * the <code>result</code> object and as such <strong>ignores</strong> the * init object. <strong>In normal operation you should call one of the other <code>cluster</code> * cluster methods instead of this one.</strong> However, if you wish to resume clustering * iterations from a result that you've already generated this is the method * to use. * * @param data the data to be clustered * @param result the results object to be populated * @throws InterruptedException if interrupted while waiting, in * which case unfinished tasks are cancelled. */ public void cluster(short[][] data, Result result) throws InterruptedException { DataSource<short[]> ds = new ShortArrayBackedDataSource(data, rng); cluster(ds, result); }
@Override public Result cluster(short[][] data) { DataSource<short[]> ds = new ShortArrayBackedDataSource(data, rng); try { Result result = cluster(ds, conf.K); result.nn = conf.factory.create(result.centroids); return result; } catch (Exception e) { throw new RuntimeException(e); } }
/** * Initiate clustering with the given data and number of clusters. * Internally this method constructs the array to hold the centroids * and calls {@link #cluster(DataSource, short [][])}. * * @param data data source to cluster with * @param K number of clusters to find * @return cluster centroids */ protected Result cluster(DataSource<short[]> data, int K) throws Exception { int D = data.numDimensions(); Result result = new Result(); result.centroids = new short[K][D]; init.initKMeans(data, result.centroids); cluster(data, result); return result; }
final ShortNearestNeighboursProvider centroids = (ShortNearestNeighboursProvider) kmeans.cluster(tmp);
@Override public int[][] performClustering(short[][] data) { ShortCentroidsResult clusters = this.cluster(data); return new IndexClusters(clusters.defaultHardAssigner().assign(data)).clusters(); }
/** * Compute HierarchicalShortKMeans clustering. * * @param data Data to cluster. * @param K Number of clusters for this node. * @param height Tree height. * * @return a new HierarchicalShortKMeans node representing a sub-clustering. **/ private Node trainLevel(final short[][] data, int K, int height) { Node node = new Node(); node.children = (height == 1) ? null : new Node[K]; ShortKMeans kmeans = newShortKMeans(K); node.result = kmeans.cluster(data); HardAssigner<short[], float[], IntFloatPair> assigner = node.result.defaultHardAssigner(); if (height > 1) { int[] ids = assigner.assign(data); for (int k = 0; k < K; k++) { short[][] partition = extractSubset(data, ids, k); int partitionK = Math.min(K, partition.length); node.children[k] = trainLevel(partition, partitionK, height - 1); } } return node; }
node.result = kmeans.cluster(data);