/** * Main method. * * @param args the options for the classifier */ public static void main(String[] args) { runClassifier(new Id3(), args); } }
/** * Returns a string describing the classifier. * @return a description suitable for the GUI. */ public String globalInfo() { return "Class for constructing an unpruned decision tree based on the ID3 " + "algorithm. Can only deal with nominal attributes. No missing values " + "allowed. Empty leaves may result in unclassified instances. For more " + "information see: \n\n" + getTechnicalInformation().toString(); }
while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]);
/** Creates a default Id3 */ public Classifier getClassifier() { return new Id3(); }
/** * Builds Id3 decision tree classifier. * * @param data the training data * @exception Exception if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); makeTree(data); }
/** * Computes information gain for an attribute. * * @param data the data for which info gain is to be computed * @param att the attribute * @return the information gain for the given attribute and data * @throws Exception if computation fails */ private double computeInfoGain(Instances data, Attribute att) throws Exception { double infoGain = computeEntropy(data); Instances[] splitData = splitData(data, att); for (int j = 0; j < att.numValues(); j++) { if (splitData[j].numInstances() > 0) { infoGain -= ((double) splitData[j].numInstances() / (double) data.numInstances()) * computeEntropy(splitData[j]); } } return infoGain; }
/** * Computes class distribution for instance using decision tree. * * @param instance the instance for which distribution is to be computed * @return the class distribution for the given instance * @throws NoSupportForMissingValuesException if instance has missing values */ public double[] distributionForInstance(Instance instance) throws NoSupportForMissingValuesException { if (instance.hasMissingValue()) { throw new NoSupportForMissingValuesException("Id3: no missing values, " + "please."); } if (m_Attribute == null) { return m_Distribution; } else { return m_Successors[(int) instance.value(m_Attribute)]. distributionForInstance(instance); } }
/** * Classifies a given test instance using the decision tree. * * @param instance the instance to be classified * @return the classification * @throws NoSupportForMissingValuesException if instance has missing values */ public double classifyInstance(Instance instance) throws NoSupportForMissingValuesException { if (instance.hasMissingValue()) { throw new NoSupportForMissingValuesException("Id3: no missing values, " + "please."); } if (m_Attribute == null) { return m_ClassValue; } else { return m_Successors[(int) instance.value(m_Attribute)]. classifyInstance(instance); } }