private double cuScoreForBestTwoMerged(CNode merged, CNode a, CNode b, Instance newInstance) throws Exception { double mergedCU = -Double.MAX_VALUE; // consider merging the best and second // best. merged.m_clusterInstances = new Instances(m_clusterInstances, 1); merged.addChildNode(a); merged.addChildNode(b); merged.updateStats(newInstance, false); // add new instance to stats // remove the best and second best nodes m_children.remove(m_children.indexOf(a)); m_children.remove(m_children.indexOf(b)); m_children.add(merged); mergedCU = categoryUtility(); // restore the status quo merged.updateStats(newInstance, true); m_children.remove(m_children.indexOf(merged)); m_children.add(a); m_children.add(b); return mergedCU; }
/** * Recursively assigns numbers to the nodes in the tree. * * @param cl_num an <code>int[]</code> value * @throws Exception if an error occurs */ private void assignClusterNums(int[] cl_num) throws Exception { if (m_children != null && m_children.size() < 2) { throw new Exception("assignClusterNums: tree not built correctly!"); } m_clusterNum = cl_num[0]; cl_num[0]++; if (m_children != null) { for (int i = 0; i < m_children.size(); i++) { CNode child = m_children.get(i); child.assignClusterNums(cl_num); } } }
/** * Computes the utility of all children with respect to this node * * @return the category utility of the children with respect to this node. * @throws Exception if there are no children */ protected double categoryUtility() throws Exception { if (m_children == null) { throw new Exception("categoryUtility: No children!"); } double totalCU = 0; for (int i = 0; i < m_children.size(); i++) { CNode child = m_children.get(i); totalCU += categoryUtilityChild(child); } totalCU /= m_children.size(); return totalCU; }
updateStats(newInstance, false); double[] categoryUtils = cuScoresForChildren(newInstance); CNode newLeaf = new CNode(m_numAttributes, newInstance); m_children.add(newLeaf); double bestHostCU = categoryUtility(); CNode finalBestHost = newLeaf; CNode merged = new CNode(m_numAttributes); if (a != b) { mergedCU = cuScoreForBestTwoMerged(merged, a, b, newInstance); splitPlusNewLeafCU = categoryUtility(); // split + new leaf categoryUtils = cuScoresForChildren(newInstance); CNode mergedSplitChildren = new CNode(m_numAttributes); if (sa != sb) { splitPlusMergeBestTwoCU = cuScoreForBestTwoMerged( mergedSplitChildren, sa, sb, newInstance); finalBestHost = new CNode(m_numAttributes); m_children.add(finalBestHost); updateStats(newInstance, true);
updateStats(newInstance, false); double[] categoryUtils = cuScoresForChildren(newInstance); CNode newLeaf = new CNode(m_numAttributes, newInstance); m_children.add(newLeaf); double bestHostCU = categoryUtility(); CNode finalBestHost = newLeaf; CNode merged = new CNode(m_numAttributes); if (a != b) { mergedCU = cuScoreForBestTwoMerged(merged, a, b, newInstance); splitPlusNewLeafCU = categoryUtility(); // split + new leaf categoryUtils = cuScoresForChildren(newInstance); CNode mergedSplitChildren = new CNode(m_numAttributes); if (sa != sb) { splitPlusMergeBestTwoCU = cuScoreForBestTwoMerged( mergedSplitChildren, sa, sb, newInstance); finalBestHost = new CNode(m_numAttributes); m_children.add(finalBestHost); updateStats(newInstance, true);
m_clusterInstances = new Instances(newInstance.dataset(), 1); m_clusterInstances.add(newInstance); updateStats(newInstance, false); return; } else if (m_children == null) { CNode tempSubCluster = new CNode(m_numAttributes, m_clusterInstances.instance(0)); tempSubCluster.updateStats(m_clusterInstances.instance(i), false); m_children.add(new CNode(m_numAttributes, newInstance)); updateStats(newInstance, false); if (categoryUtility() < m_cutoff) { CNode bestHost = findHost(newInstance, false); if (bestHost != null) { bestHost.addInstance(newInstance);
m_clusterInstances = new Instances(newInstance.dataset(), 1); m_clusterInstances.add(newInstance); updateStats(newInstance, false); return; } else if (m_children == null) { CNode tempSubCluster = new CNode(m_numAttributes, m_clusterInstances.instance(0)); tempSubCluster.updateStats(m_clusterInstances.instance(i), false); m_children.add(new CNode(m_numAttributes, newInstance)); updateStats(newInstance, false); if (categoryUtility() < m_cutoff) { CNode bestHost = findHost(newInstance, false); if (bestHost != null) { bestHost.addInstance(newInstance);
CNode tempNode = new CNode(m_numAttributes); tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1); for (int i = 0; i < m_children.size(); i++) { tempNode.addChildNode(m_children.get(i));
CNode tempNode = new CNode(m_numAttributes); tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1); for (int i = 0; i < m_children.size(); i++) { tempNode.addChildNode(m_children.get(i));
/** * Recursively build a string representation of the Cobweb tree * * @param depth depth of this node in the tree * @param text holds the string representation */ protected void dumpTree(int depth, StringBuffer text) { if (depth == 0) { determineNumberOfClusters(); } if (m_children == null) { text.append("\n"); for (int j = 0; j < depth; j++) { text.append("| "); } text.append("leaf " + m_clusterNum + " [" + m_clusterInstances.numInstances() + "]"); } else { for (int i = 0; i < m_children.size(); i++) { text.append("\n"); for (int j = 0; j < depth; j++) { text.append("| "); } text.append("node " + m_clusterNum + " [" + m_clusterInstances.numInstances() + "]"); m_children.get(i).dumpTree(depth + 1, text); } } }
/** * Recursively generate the graph string for the Cobweb tree. * * @param text holds the graph string * @throws Exception if generation fails */ protected void graphTree(StringBuffer text) throws Exception { text.append("N" + m_clusterNum + " [label=\"" + ((m_children == null) ? "leaf " : "node ") + m_clusterNum + " " + " (" + m_clusterInstances.numInstances() + ")\" " + ((m_children == null) ? "shape=box style=filled " : "") + (m_saveInstances ? "data =\n" + dumpData() + "\n,\n" : "") + "]\n"); if (m_children != null) { for (int i = 0; i < m_children.size(); i++) { CNode temp = m_children.get(i); text.append("N" + m_clusterNum + "->" + "N" + temp.m_clusterNum + "\n"); } for (int i = 0; i < m_children.size(); i++) { CNode temp = m_children.get(i); temp.graphTree(text); } } }
/** * Recursively build a string representation of the Cobweb tree * * @param depth depth of this node in the tree * @param text holds the string representation */ protected void dumpTree(int depth, StringBuffer text) { if (depth == 0) { determineNumberOfClusters(); } if (m_children == null) { text.append("\n"); for (int j = 0; j < depth; j++) { text.append("| "); } text.append("leaf " + m_clusterNum + " [" + m_clusterInstances.numInstances() + "]"); } else { for (int i = 0; i < m_children.size(); i++) { text.append("\n"); for (int j = 0; j < depth; j++) { text.append("| "); } text.append("node " + m_clusterNum + " [" + m_clusterInstances.numInstances() + "]"); m_children.get(i).dumpTree(depth + 1, text); } } }
/** * Temporarily adds a new instance to each of this nodes children in turn * and computes the category utility. * * @param newInstance the new instance to evaluate * @return an array of category utility values---the result of considering * each child in turn as a host for the new instance * @throws Exception if an error occurs */ private double[] cuScoresForChildren(Instance newInstance) throws Exception { // look for a host in existing children double[] categoryUtils = new double[m_children.size()]; // look for a home for this instance in the existing children for (int i = 0; i < m_children.size(); i++) { CNode temp = m_children.get(i); // tentitively add the new instance to this child temp.updateStats(newInstance, false); categoryUtils[i] = categoryUtility(); // remove the new instance from this child temp.updateStats(newInstance, true); } return categoryUtils; }
/** * Recursively generate the graph string for the Cobweb tree. * * @param text holds the graph string * @throws Exception if generation fails */ protected void graphTree(StringBuffer text) throws Exception { text.append("N" + m_clusterNum + " [label=\"" + ((m_children == null) ? "leaf " : "node ") + m_clusterNum + " " + " (" + m_clusterInstances.numInstances() + ")\" " + ((m_children == null) ? "shape=box style=filled " : "") + (m_saveInstances ? "data =\n" + dumpData() + "\n,\n" : "") + "]\n"); if (m_children != null) { for (int i = 0; i < m_children.size(); i++) { CNode temp = m_children.get(i); text.append("N" + m_clusterNum + "->" + "N" + temp.m_clusterNum + "\n"); } for (int i = 0; i < m_children.size(); i++) { CNode temp = m_children.get(i); temp.graphTree(text); } } }
/** * Temporarily adds a new instance to each of this nodes children in turn * and computes the category utility. * * @param newInstance the new instance to evaluate * @return an array of category utility values---the result of considering * each child in turn as a host for the new instance * @throws Exception if an error occurs */ private double[] cuScoresForChildren(Instance newInstance) throws Exception { // look for a host in existing children double[] categoryUtils = new double[m_children.size()]; // look for a home for this instance in the existing children for (int i = 0; i < m_children.size(); i++) { CNode temp = m_children.get(i); // tentitively add the new instance to this child temp.updateStats(newInstance, false); categoryUtils[i] = categoryUtility(); // remove the new instance from this child temp.updateStats(newInstance, true); } return categoryUtils; }
/** * Computes the utility of all children with respect to this node * * @return the category utility of the children with respect to this node. * @throws Exception if there are no children */ protected double categoryUtility() throws Exception { if (m_children == null) { throw new Exception("categoryUtility: No children!"); } double totalCU = 0; for (int i = 0; i < m_children.size(); i++) { CNode child = m_children.get(i); totalCU += categoryUtilityChild(child); } totalCU /= m_children.size(); return totalCU; }
/** * Returns a description of the clusterer as a string. * * @return a string describing the clusterer. */ @Override public String toString() { StringBuffer text = new StringBuffer(); if (m_cobwebTree == null) { return "Cobweb hasn't been built yet!"; } else { m_cobwebTree.dumpTree(0, text); return "Number of merges: " + m_numberMerges + "\nNumber of splits: " + m_numberSplits + "\nNumber of clusters: " + numberOfClusters() + "\n" + text.toString() + "\n\n"; } }
private double cuScoreForBestTwoMerged(CNode merged, CNode a, CNode b, Instance newInstance) throws Exception { double mergedCU = -Double.MAX_VALUE; // consider merging the best and second // best. merged.m_clusterInstances = new Instances(m_clusterInstances, 1); merged.addChildNode(a); merged.addChildNode(b); merged.updateStats(newInstance, false); // add new instance to stats // remove the best and second best nodes m_children.remove(m_children.indexOf(a)); m_children.remove(m_children.indexOf(b)); m_children.add(merged); mergedCU = categoryUtility(); // restore the status quo merged.updateStats(newInstance, true); m_children.remove(m_children.indexOf(merged)); m_children.add(a); m_children.add(b); return mergedCU; }
/** * Recursively assigns numbers to the nodes in the tree. * * @param cl_num an <code>int[]</code> value * @throws Exception if an error occurs */ private void assignClusterNums(int[] cl_num) throws Exception { if (m_children != null && m_children.size() < 2) { throw new Exception("assignClusterNums: tree not built correctly!"); } m_clusterNum = cl_num[0]; cl_num[0]++; if (m_children != null) { for (int i = 0; i < m_children.size(); i++) { CNode child = m_children.get(i); child.assignClusterNums(cl_num); } } }
/** * Returns a description of the clusterer as a string. * * @return a string describing the clusterer. */ @Override public String toString() { StringBuffer text = new StringBuffer(); if (m_cobwebTree == null) { return "Cobweb hasn't been built yet!"; } else { m_cobwebTree.dumpTree(0, text); return "Number of merges: " + m_numberMerges + "\nNumber of splits: " + m_numberSplits + "\nNumber of clusters: " + numberOfClusters() + "\n" + text.toString() + "\n\n"; } }