/** * Constructor for numeric attribute. * * @param data the data to work with * @param attribute the attribute to use * @param nBreaks the break point * @throws Exception if something goes wrong */ public OneRRule(Instances data, Attribute attribute, int nBreaks) throws Exception { m_class = data.classAttribute(); m_numInst = data.numInstances(); m_attr = attribute; m_correct = 0; m_classifications = new int[nBreaks]; m_breakpoints = new double[nBreaks - 1]; // last breakpoint is infinity }
/** * Fixes nominal label indices. Dl4j sorts them during training time. A mapping from weka-labels * resorted labels is stored in {@link this.labelsSortIndex}. * * @param j Original index * @param insts Test dataset * @return Remapped index if test dataset has nominal label. Else return {@code j} */ protected int fixLabelIndexIfNominal(int j, Instances insts) { if (insts.classAttribute().isNominal()) { return labelSortIndex[j]; } else { return j; } }
@Override public List<String> getPredictionLabels() { if (m_modelHeader == null) { return null; } if (m_modelHeader.classAttribute().isNominal()) { if (m_predictionLabels == null) { m_predictionLabels = new ArrayList<String>(); for (int i = 0; i < m_modelHeader.classAttribute().numValues(); i++) { m_predictionLabels.add(m_modelHeader.classAttribute().value(i)); } } } return m_predictionLabels; }
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); int L = D.classIndex(); if(getDebug()) System.out.print("Creating "+L+" models ("+m_Classifier.getClass().getName()+"): "); m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier,L); m_Templates = new Instances[L]; for(int j = 0; j < L; j++) { //Select only class attribute 'j' m_Templates[j] = MLUtils.keepAttributesAt(new Instances(D),new int[]{j},L); m_Templates[j].setClassIndex(0); //Build the classifier for that class m_MultiClassifiers[j].buildClassifier(m_Templates[j]); if(getDebug()) System.out.print(" " + (m_Templates[j].classAttribute().name())); m_Templates[j] = new Instances(m_Templates[j], 0); } }
/** * Makes the format for the level-1 data. * * @param instances the level-0 format * @return the format for the meta data * @throws Exception if the format generation fails */ protected Instances metaFormat(Instances instances) throws Exception { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); Instances metaFormat; for (int k = 0; k < m_Classifiers.length; k++) { Classifier classifier = (Classifier) getClassifier(k); String name = classifier.getClass().getName() + "-" + (k+1); if (m_BaseFormat.classAttribute().isNumeric()) { attributes.add(new Attribute(name)); } else { for (int j = 0; j < m_BaseFormat.classAttribute().numValues(); j++) { attributes.add( new Attribute( name + ":" + m_BaseFormat.classAttribute().value(j))); } } } attributes.add((Attribute) m_BaseFormat.classAttribute().copy()); metaFormat = new Instances("Meta format", attributes, 0); metaFormat.setClassIndex(metaFormat.numAttributes() - 1); return metaFormat; }
instances = new Instances(instances); instances.deleteWithMissingClass(); m_ClassType = instances.classAttribute().type(); m_Train = new Instances(instances, 0, instances.numInstances()); if ((m_WindowSize > 0) && (instances.numInstances() > m_WindowSize)) { m_Train = new Instances(m_Train, m_Train.numInstances()-m_WindowSize, m_WindowSize); if ((i != m_Train.classIndex()) && (m_Train.attribute(i).isNominal() || m_Train.attribute(i).isNumeric())) { m_NumAttributesUsed += 1.0;
/** * Filter the data according to the ruleset and compute the basic stats: * coverage/uncoverage, true/false positive/negatives of each rule */ public void countData() { if ((m_Filtered != null) || (m_Ruleset == null) || (m_Data == null)) { return; } int size = m_Ruleset.size(); m_Filtered = new ArrayList<Instances[]>(size); m_SimpleStats = new ArrayList<double[]>(size); m_Distributions = new ArrayList<double[]>(size); Instances data = new Instances(m_Data); for (int i = 0; i < size; i++) { double[] stats = new double[6]; // 6 statistics parameters double[] classCounts = new double[m_Data.classAttribute().numValues()]; Instances[] filtered = computeSimpleStats(i, data, stats, classCounts); m_Filtered.add(filtered); m_SimpleStats.add(stats); m_Distributions.add(classCounts); data = filtered[1]; // Data not covered } }
/** * generates a consequence of length 1 for a class association rule. * * @param instances the instances under consideration * @return FastVector with consequences of length 1 */ public static ArrayList<Object> singleConsequence(Instances instances) { ItemSet consequence; ArrayList<Object> consequences = new ArrayList<Object>(); for (int j = 0; j < (instances.classAttribute()).numValues(); j++) { consequence = new ItemSet(instances.numInstances()); int[] consequenceItems = new int[instances.numAttributes()]; consequence.setItem(consequenceItems); for (int k = 0; k < instances.numAttributes(); k++) { consequence.setItemAt(-1, k); } consequence.setItemAt(j, instances.classIndex()); consequences.add(consequence); } return consequences; }
/** * mapBack: returns the original indices (encoded in the class attribute). */ private int[] mapBack(Instances template, int i) { try { return MLUtils.toIntArray(template.classAttribute().value(i)); } catch(Exception e) { return new int[]{}; } }
@SuppressWarnings("unchecked") public void setup(Instances inputFormat) throws Exception { m_inputContainsStringAttributes = inputFormat.checkForStringAttributes(); m_inputFormat = inputFormat.stringFreeStructure(); if (!m_inputContainsStringAttributes) { return; } m_numClasses = !m_doNotOperateOnPerClassBasis && m_inputFormat.classIndex() >= 0 && m_inputFormat.classAttribute().isNominal() ? m_inputFormat.numClasses() : 1; m_dictsPerClass = m_sortDictionary ? new TreeMap[m_numClasses] : new LinkedHashMap[m_numClasses]; m_classIndex = m_inputFormat.classIndex(); for (int i = 0; i < m_numClasses; i++) { m_dictsPerClass[i] = m_sortDictionary ? new TreeMap<String, int[]>() : new LinkedHashMap<String, int[]>(); } determineSelectedRange(inputFormat); }
/** * Returns a string representation of the classifier. * * @return a string representation of the classifier */ public String toString() { StringBuffer result = new StringBuffer("The class counts (including Laplace correction)\n-----------------------------------------------\n"); for(int c = 0; c<m_numClasses; c++) result.append(m_headerInfo.classAttribute().value(c)).append("\t"). append(Utils.doubleToString(m_probOfClass[c], getNumDecimalPlaces())).append("\n"); result.append("\nThe probability of a word given the class\n-----------------------------------------\n\t"); for(int c = 0; c<m_numClasses; c++) result.append(m_headerInfo.classAttribute().value(c)).append("\t"); result.append("\n"); for(int w = 0; w<m_numAttributes; w++) { if (w != m_headerInfo.classIndex()) { result.append(m_headerInfo.attribute(w).name()).append("\t"); for(int c = 0; c<m_numClasses; c++) result.append(Utils.doubleToString(m_probOfWordGivenClass[c][w] / m_wordsPerClass[c], getNumDecimalPlaces())).append("\t"); result.append("\n"); } } return result.toString(); }
/** * Generate a bunch of predictions ready for processing, by performing a * cross-validation on the supplied dataset. * * @param classifier the Classifier to evaluate * @param data the dataset * @param numFolds the number of folds in the cross-validation. * @exception Exception if an error occurs */ public ArrayList<Prediction> getCVPredictions(Classifier classifier, Instances data, int numFolds) throws Exception { ArrayList<Prediction> predictions = new ArrayList<Prediction>(); Instances runInstances = new Instances(data); Random random = new Random(m_Seed); runInstances.randomize(random); if (runInstances.classAttribute().isNominal() && (numFolds > 1)) { runInstances.stratify(numFolds); } for (int fold = 0; fold < numFolds; fold++) { Instances train = runInstances.trainCV(numFolds, fold, random); Instances test = runInstances.testCV(numFolds, fold); ArrayList<Prediction> foldPred = getTrainTestPredictions(classifier, train, test); predictions.addAll(foldPred); } return predictions; }
BufferedReader datafile = readDataFile(TrainingFile); Instances train = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); Classifier cls = new J48(); cls.buildClassifier(train); DataSource testDataset = new DataSource(Test); Instances test = testDataset.getDataSet(); Testdata.setClassIndex(Testdata.numAttributes() - 1); for(int i = 0; i < test.numInstances(); i++){ Instance inst = test.instance(i); double actualClassValue = test.instance(i).classValue(); //it will print your class value String actual=test.classAttribute().value((int)actualClassValue); double result = cls.classifyInstance(inst); //will print your predicted value String prediction=test.classAttribute().value((int)result ); }
public static List<String> getClassLabels(Instances data, boolean isMultilabel) { List<String> classLabelList = new ArrayList<String>(); if (!isMultilabel) { Enumeration<Object> classLabels = data.classAttribute().enumerateValues(); while (classLabels.hasMoreElements()) { classLabelList.add((String) classLabels.nextElement()); } } else { int numLabels = data.classIndex(); for (int i = 0; i < numLabels; i++) { classLabelList.add(data.attribute(i).name()); } } return classLabelList; }
/** * compares the generated dataset with the original one */ protected void performTests(Instances result) { assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // None of the attributes should have changed for (int i = 0; i < result.numAttributes(); i++) { assertEquals(m_Instances.attribute(i).type(), result.attribute(i).type()); assertEquals(m_Instances.attribute(i).name(), result.attribute(i).name()); } // did the order change? boolean orderEqual = true; for (int i = 0; i < result.numClasses(); i++) { if (!m_Instances.classAttribute().value(i).equals( result.classAttribute().value(i))) { orderEqual = false; break; } } if (orderEqual) fail("Order wasn't changed!"); }
if (data.classAttribute().isNominal()) { classifier = new weka.classifiers.trees.J48(); } else { result = new double[data.numInstances()]; for (i = 0; i < result.length; i++) { result[i] = classifier.classifyInstance(data.instance(i));
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); int L = D.classIndex(); if(getDebug()) System.out.print("Creating "+L+" models ("+m_Classifier.getClass().getName()+"): "); m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier,L); m_InstancesTemplates = new Instances[L]; for(int j = 0; j < L; j++) { //Select only class attribute 'j' Instances D_j = F.keepLabels(new Instances(D),L,new int[]{j}); D_j.setClassIndex(0); //Build the classifier for that class m_MultiClassifiers[j].buildClassifier(D_j); if(getDebug()) System.out.print(" " + (D_j.classAttribute().name())); m_InstancesTemplates[j] = new Instances(D_j, 0); } }
/** * Determines the format of the level-1 data. * * @param instances the level-0 format * @return the format for the meta data * @throws Exception if the format generation fails */ protected Instances metaFormat(Instances instances) throws Exception { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); Instances metaFormat; for (int k = 0; k < m_Classifiers.length; k++) { Classifier classifier = getClassifier(k); String name = classifier.getClass().getName() + "-" + (k + 1); if (m_BaseFormat.classAttribute().isNumeric()) { attributes.add(new Attribute(name)); } else { for (int j = 0; j < m_BaseFormat.classAttribute().numValues(); j++) { attributes.add(new Attribute(name + ":" + m_BaseFormat.classAttribute().value(j))); } } } attributes.add((Attribute) m_BaseFormat.classAttribute().copy()); metaFormat = new Instances("Meta format", attributes, 0); metaFormat.setClassIndex(metaFormat.numAttributes() - 1); return metaFormat; }
/** * Constructor for numeric attribute. * * @param data the data to work with * @param attribute the attribute to use * @param nBreaks the break point * @throws Exception if something goes wrong */ public OneRRule(Instances data, Attribute attribute, int nBreaks) throws Exception { m_class = data.classAttribute(); m_numInst = data.numInstances(); m_attr = attribute; m_correct = 0; m_classifications = new int[nBreaks]; m_breakpoints = new double[nBreaks - 1]; // last breakpoint is infinity }
/** * Filter the data according to the ruleset and compute the basic stats: * coverage/uncoverage, true/false positive/negatives of each rule */ public void countData() { if ((m_Filtered != null) || (m_Ruleset == null) || (m_Data == null)) { return; } int size = m_Ruleset.size(); m_Filtered = new ArrayList<Instances[]>(size); m_SimpleStats = new ArrayList<double[]>(size); m_Distributions = new ArrayList<double[]>(size); Instances data = new Instances(m_Data); for (int i = 0; i < size; i++) { double[] stats = new double[6]; // 6 statistics parameters double[] classCounts = new double[m_Data.classAttribute().numValues()]; Instances[] filtered = computeSimpleStats(i, data, stats, classCounts); m_Filtered.add(filtered); m_SimpleStats.add(stats); m_Distributions.add(classCounts); data = filtered[1]; // Data not covered } }