private Node readNodeB(HierarchicalDoubleKMeansResult hDoublekm, DataInput dis) throws IOException { Node node = new Node(); char type = (char) dis.readByte(); //read result data node.result = new DoubleCentroidsResult(); node.result.readBinary(dis); if (type == 'I') { node.children = new Node[node.result.numClusters()]; for (int i=0; i<node.result.numClusters(); i++) { node.children[i] = readNodeB(hDoublekm, dis); } } else { node.children = null; } return node ; }
/** * Given a path, get the cluster centroid associated with the cluster index of the path. * @param path * @return the centroid of a given path */ public double [] getClusterCentroid(int [] path) { Node node = root; for (int i=0; i<path.length-1; i++) { node = node.children[path[i]]; } return node.result.getCentroids()[path[path.length-1]]; }
private Node readNode(HierarchicalDoubleKMeansResult hDoublekm, Scanner reader) throws IOException { String line; while ((line = reader.nextLine()).length()==0) {/*do nothing*/} char type = line.charAt(0); //read result data Node node = new Node(); node.result = new DoubleCentroidsResult(); node.result.readASCII(reader); if (type == 'I') { node.children = new Node[node.result.numClusters()]; for (int i=0; i<node.result.numClusters(); i++) { node.children[i] = readNode(hDoublekm,reader); } } else { node.children = null; } return node ; }
private void writeNodeB(DataOutput dos, Node node) throws IOException { //write node type char type; if (node.children == null) type='L'; //intermediate else type='I'; //leaf dos.writeByte(type); //write result data node.result.writeBinary(dos); //write children if (node.children != null) { for (int i=0; i<node.result.numClusters(); i++) { writeNodeB(dos, node.children[i]); } } }
private void writeNodeASCII(PrintWriter writer, final Node node) throws IOException { //write node type if (node.children == null) writer.write("L\n"); //intermediate else writer.write("I\n"); //leaf //write result data node.result.writeASCII(writer); // node.result.writeASCII(writer, false); writer.flush(); //write children if (node.children != null) { for (int i=0; i<node.result.numClusters(); i++) { writeNodeASCII(writer, node.children[i]); } } }
@Override public int[][] performClustering(double[][] data) { DoubleCentroidsResult res = this.cluster(data); return new IndexClusters(res.defaultHardAssigner().assign(data)).clusters(); }
/*** * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided * data points will be selected. It is not guaranteed that the same data point will not be selected * many times. * * @params data source of centroids * @return the selected centroids */ @Override public DoubleCentroidsResult cluster(double[][] data) { int nc = this.K; if (nc == -1) { nc = data.length; } DoubleCentroidsResult result = new DoubleCentroidsResult(); result.centroids = new double[nc][]; for (int i = 0; i < nc; i++) { int dIndex = this.random.nextInt(data.length); result.centroids[i] = Arrays.copyOf(data[dIndex], data[dIndex].length); } return result; }
private int countLeaves(Node node) { int count = 0; if (node.children == null) { count = node.result.numClusters(); } else { for (int i=0; i<node.result.numClusters(); i++) { count += countLeaves(node.children[i]); } } return count; }
@Override public int[][] performClustering(double[][] data) { DoubleCentroidsResult clusters = this.cluster(data); return new IndexClusters(clusters.defaultHardAssigner().assign(data)).clusters(); }
DoubleCentroidsResult result = new DoubleCentroidsResult();
/** * Compute HierarchicalDoubleKMeans clustering. * * @param data Data to cluster. * @param K Number of clusters for this node. * @param height Tree height. * * @return a new HierarchicalDoubleKMeans node representing a sub-clustering. **/ private Node trainLevel(final double[][] data, int K, int height) { Node node = new Node(); node.children = (height == 1) ? null : new Node[K]; DoubleKMeans kmeans = newDoubleKMeans(K); node.result = kmeans.cluster(data); HardAssigner<double[], double[], IntDoublePair> assigner = node.result.defaultHardAssigner(); if (height > 1) { int[] ids = assigner.assign(data); for (int k = 0; k < K; k++) { double[][] partition = extractSubset(data, ids, k); int partitionK = Math.min(K, partition.length); node.children[k] = trainLevel(partition, partitionK, height - 1); } } return node; }
/** * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. * If K is -1 all provided data points will be selected. It is not guaranteed that the same data * point will not be selected many times. * * @params data a data source object * @return the selected centroids */ @Override public DoubleCentroidsResult cluster(DataSource<double[]> data) { int nc = this.K; if (nc == -1) { nc = data.size(); } DoubleCentroidsResult result = new DoubleCentroidsResult(); result.centroids = new double[nc][M]; double[][] dataRow = new double[1][]; for (int i = 0; i < nc; i++) { int dIndex = this.random.nextInt(data.size()); dataRow[0] = result.centroids[i]; data.getData(dIndex, dIndex+1, dataRow); } return result; } }
node.result = kmeans.cluster(data); HardAssigner<double[], double[], IntDoublePair> assigner = node.result.defaultHardAssigner();
/** * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. * If K is -1 all provided data points will be selected. It is guaranteed that the same data * point will not be selected many times, though this is not the case if two seperate entries * provided are identical. * * @params data a data source object * @return the selected centroids */ @Override public DoubleCentroidsResult cluster(DataSource<double[]> data) { DoubleCentroidsResult result = new DoubleCentroidsResult(); if(K == -1) { final int nc = data.size(); result.centroids = new double[nc][data.numDimensions()]; } else { result.centroids = new double[K][data.numDimensions()]; } data.getRandomRows(result.centroids); return result; } }