public StringBuffer getStringBufferName() { StringBuffer sb = new StringBuffer(); if (m_parent == null) return sb.append("root"); else if (m_parent.getParent() == null) { sb.append("(\""); sb.append(m_dataDict.lookupObject(m_parent.getGainRatio().getMaxValuedIndex()).toString()); sb.append("\""); if (m_parent.getLeftChild() == this) sb.append(" <= "); else sb.append(" > "); sb.append(m_parent.getGainRatio().getMaxValuedThreshold()); return sb.append(")"); } else { sb.append(m_parent.getStringBufferName()); sb.append(" && (\""); sb.append(m_dataDict.lookupObject(m_parent.getGainRatio().getMaxValuedIndex()).toString()); sb.append("\""); if (m_parent.getLeftChild() == this) sb.append(" <= "); else sb.append(" > "); sb.append(m_parent.getGainRatio().getMaxValuedThreshold()); return sb.append(")"); } }
public StringBuffer getStringBufferName() { StringBuffer sb = new StringBuffer(); if (m_parent == null) return sb.append("root"); else if (m_parent.getParent() == null) { sb.append("(\""); sb.append(m_dataDict.lookupObject(m_parent.getGainRatio().getMaxValuedIndex()).toString()); sb.append("\""); if (m_parent.getLeftChild() == this) sb.append(" <= "); else sb.append(" > "); sb.append(m_parent.getGainRatio().getMaxValuedThreshold()); return sb.append(")"); } else { sb.append(m_parent.getStringBufferName()); sb.append(" && (\""); sb.append(m_dataDict.lookupObject(m_parent.getGainRatio().getMaxValuedIndex()).toString()); sb.append("\""); if (m_parent.getLeftChild() == this) sb.append(" <= "); else sb.append(" > "); sb.append(m_parent.getGainRatio().getMaxValuedThreshold()); return sb.append(")"); } }
public StringBuffer getStringBufferName() { StringBuffer sb = new StringBuffer(); if (m_parent == null) return sb.append("root"); else if (m_parent.getParent() == null) { sb.append("(\""); sb.append(m_dataDict.lookupObject(m_parent.getGainRatio().getMaxValuedIndex()).toString()); sb.append("\""); if (m_parent.getLeftChild() == this) sb.append(" <= "); else sb.append(" > "); sb.append(m_parent.getGainRatio().getMaxValuedThreshold()); return sb.append(")"); } else { sb.append(m_parent.getStringBufferName()); sb.append(" && (\""); sb.append(m_dataDict.lookupObject(m_parent.getGainRatio().getMaxValuedIndex()).toString()); sb.append("\""); if (m_parent.getLeftChild() == this) sb.append(" <= "); else sb.append(" > "); sb.append(m_parent.getGainRatio().getMaxValuedThreshold()); return sb.append(")"); } }
public void print(String prefix) int bestLabelIndex = getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * getSize()); System.out.println("root:" + getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + getSize()); String featName = m_dataDict.lookupObject(getGainRatio().getMaxValuedIndex()).toString(); double threshold = getGainRatio().getMaxValuedThreshold(); System.out.print(prefix + "\"" + featName + "\" <= " + threshold + ":"); if (m_leftChild.isLeaf()) { int bestLabelIndex = m_leftChild.getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (m_leftChild.getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * m_leftChild.getSize()); System.out.println(m_leftChild.getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + m_leftChild.getSize()); int bestLabelIndex = m_rightChild.getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (m_rightChild.getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * m_rightChild.getSize()); System.out.println(m_rightChild.getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + m_rightChild.getSize());
public void print(String prefix) int bestLabelIndex = getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * getSize()); System.out.println("root:" + getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + getSize()); String featName = m_dataDict.lookupObject(getGainRatio().getMaxValuedIndex()).toString(); double threshold = getGainRatio().getMaxValuedThreshold(); System.out.print(prefix + "\"" + featName + "\" <= " + threshold + ":"); if (m_leftChild.isLeaf()) { int bestLabelIndex = m_leftChild.getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (m_leftChild.getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * m_leftChild.getSize()); System.out.println(m_leftChild.getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + m_leftChild.getSize()); int bestLabelIndex = m_rightChild.getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (m_rightChild.getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * m_rightChild.getSize()); System.out.println(m_rightChild.getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + m_rightChild.getSize());
public void print(String prefix) int bestLabelIndex = getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * getSize()); System.out.println("root:" + getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + getSize()); String featName = m_dataDict.lookupObject(getGainRatio().getMaxValuedIndex()).toString(); double threshold = getGainRatio().getMaxValuedThreshold(); System.out.print(prefix + "\"" + featName + "\" <= " + threshold + ":"); if (m_leftChild.isLeaf()) { int bestLabelIndex = m_leftChild.getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (m_leftChild.getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * m_leftChild.getSize()); System.out.println(m_leftChild.getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + m_leftChild.getSize()); int bestLabelIndex = m_rightChild.getGainRatio().getBaseLabelDistribution().getBestIndex(); int numMajorityLabel = (int) (m_rightChild.getGainRatio().getBaseLabelDistribution().value(bestLabelIndex) * m_rightChild.getSize()); System.out.println(m_rightChild.getGainRatio().getBaseLabelDistribution().getBestLabel() + " " + numMajorityLabel + "/" + m_rightChild.getSize());
protected void splitTree(C45.Node node, int depth) { // Stop growing the tree when any of the following is true: // 1. We care about tree depth and maximum depth is reached // 2. The entropy of the node is too small (i.e., all // instances belong to the same class) // 3. The gain ratio of the best split available is too small if (m_depthLimited && depth == m_maxDepth) { logger.info("Splitting stopped: maximum depth reached (" + m_maxDepth + ")"); return; } else if (Maths.almostEquals(node.getGainRatio().getBaseEntropy(), 0)) { logger.info("Splitting stopped: entropy of node too small (" + node.getGainRatio().getBaseEntropy() + ")"); return; } else if (Maths.almostEquals(node.getGainRatio().getMaxValue(), 0)) { logger.info("Splitting stopped: node has insignificant gain ratio (" + node.getGainRatio().getMaxValue() + ")"); return; } logger.info("Splitting feature \""+node.getSplitFeature() +"\" at threshold=" + node.getGainRatio().getMaxValuedThreshold() + " gain ratio="+node.getGainRatio().getMaxValue()); node.split(); splitTree(node.getLeftChild(), depth+1); splitTree(node.getRightChild(), depth+1); }
protected void splitTree(C45.Node node, int depth) { // Stop growing the tree when any of the following is true: // 1. We care about tree depth and maximum depth is reached // 2. The entropy of the node is too small (i.e., all // instances belong to the same class) // 3. The gain ratio of the best split available is too small if (m_depthLimited && depth == m_maxDepth) { logger.info("Splitting stopped: maximum depth reached (" + m_maxDepth + ")"); return; } else if (Maths.almostEquals(node.getGainRatio().getBaseEntropy(), 0)) { logger.info("Splitting stopped: entropy of node too small (" + node.getGainRatio().getBaseEntropy() + ")"); return; } else if (Maths.almostEquals(node.getGainRatio().getMaxValue(), 0)) { logger.info("Splitting stopped: node has insignificant gain ratio (" + node.getGainRatio().getMaxValue() + ")"); return; } logger.info("Splitting feature \""+node.getSplitFeature() +"\" at threshold=" + node.getGainRatio().getMaxValuedThreshold() + " gain ratio="+node.getGainRatio().getMaxValue()); node.split(); splitTree(node.getLeftChild(), depth+1); splitTree(node.getRightChild(), depth+1); }
protected void splitTree(C45.Node node, int depth) { // Stop growing the tree when any of the following is true: // 1. We care about tree depth and maximum depth is reached // 2. The entropy of the node is too small (i.e., all // instances belong to the same class) // 3. The gain ratio of the best split available is too small if (m_depthLimited && depth == m_maxDepth) { logger.info("Splitting stopped: maximum depth reached (" + m_maxDepth + ")"); return; } else if (Maths.almostEquals(node.getGainRatio().getBaseEntropy(), 0)) { logger.info("Splitting stopped: entropy of node too small (" + node.getGainRatio().getBaseEntropy() + ")"); return; } else if (Maths.almostEquals(node.getGainRatio().getMaxValue(), 0)) { logger.info("Splitting stopped: node has insignificant gain ratio (" + node.getGainRatio().getMaxValue() + ")"); return; } logger.info("Splitting feature \""+node.getSplitFeature() +"\" at threshold=" + node.getGainRatio().getMaxValuedThreshold() + " gain ratio="+node.getGainRatio().getMaxValue()); node.split(); splitTree(node.getLeftChild(), depth+1); splitTree(node.getRightChild(), depth+1); }
/** * Calculates the minimum description length of this node, i.e., * the length of the binary encoding that describes the feature * and the split value used at this node */ public double getMDL() { int numClasses = m_ilist.getTargetAlphabet().size(); double mdl = getSize() * getGainRatio().getBaseEntropy(); mdl += ((numClasses-1) * Math.log(getSize() / 2.0)) / (2 * GainRatio.log2); double piPow = Math.pow(Math.PI, numClasses/2.0); double gammaVal = Maths.gamma(numClasses/2.0); mdl += Math.log(piPow/gammaVal) / GainRatio.log2; return mdl; }
/** * Calculates the minimum description length of this node, i.e., * the length of the binary encoding that describes the feature * and the split value used at this node */ public double getMDL() { int numClasses = m_ilist.getTargetAlphabet().size(); double mdl = getSize() * getGainRatio().getBaseEntropy(); mdl += ((numClasses-1) * Math.log(getSize() / 2.0)) / (2 * GainRatio.log2); double piPow = Math.pow(Math.PI, numClasses/2.0); double gammaVal = Maths.gamma(numClasses/2.0); mdl += Math.log(piPow/gammaVal) / GainRatio.log2; return mdl; }
/** * Calculates the minimum description length of this node, i.e., * the length of the binary encoding that describes the feature * and the split value used at this node */ public double getMDL() { int numClasses = m_ilist.getTargetAlphabet().size(); double mdl = getSize() * getGainRatio().getBaseEntropy(); mdl += ((numClasses-1) * Math.log(getSize() / 2.0)) / (2 * GainRatio.log2); double piPow = Math.pow(Math.PI, numClasses/2.0); double gammaVal = Maths.gamma(numClasses/2.0); mdl += Math.log(piPow/gammaVal) / GainRatio.log2; return mdl; }
private Node getLeaf (Node node, FeatureVector fv) { if (node.getLeftChild() == null && node.getRightChild() == null) return node; else if (fv.value(node.getGainRatio().getMaxValuedIndex()) <= node.getGainRatio().getMaxValuedThreshold()) return getLeaf(node.getLeftChild(), fv); else return getLeaf(node.getRightChild(), fv); }
public Classification classify (Instance instance) { FeatureVector fv = (FeatureVector) instance.getData (); assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ()); Node leaf = getLeaf(m_root, fv); return new Classification (instance, this, leaf.getGainRatio().getBaseLabelDistribution()); }
public Classification classify (Instance instance) { FeatureVector fv = (FeatureVector) instance.getData (); assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ()); Node leaf = getLeaf(m_root, fv); return new Classification (instance, this, leaf.getGainRatio().getBaseLabelDistribution()); }
public Classification classify (Instance instance) { FeatureVector fv = (FeatureVector) instance.getData (); assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ()); Node leaf = getLeaf(m_root, fv); return new Classification (instance, this, leaf.getGainRatio().getBaseLabelDistribution()); }
private Node getLeaf (Node node, FeatureVector fv) { if (node.getLeftChild() == null && node.getRightChild() == null) return node; else if (fv.value(node.getGainRatio().getMaxValuedIndex()) <= node.getGainRatio().getMaxValuedThreshold()) return getLeaf(node.getLeftChild(), fv); else return getLeaf(node.getRightChild(), fv); }
private Node getLeaf (Node node, FeatureVector fv) { if (node.getLeftChild() == null && node.getRightChild() == null) return node; else if (fv.value(node.getGainRatio().getMaxValuedIndex()) <= node.getGainRatio().getMaxValuedThreshold()) return getLeaf(node.getLeftChild(), fv); else return getLeaf(node.getRightChild(), fv); }