Classifier cModel = (Classifier)new NaiveBayes(); cModel.buildClassifier(isTrainingSet); weka.core.SerializationHelper.write("/some/where/nBayes.model", cModel); Classifier cls = (Classifier) weka.core.SerializationHelper.read("/some/where/nBayes.model"); // Test the model Evaluation eTest = new Evaluation(isTrainingSet); eTest.evaluateModel(cls, isTrainingSet);
@Override public String classify(Instance instance) throws Exception { weka.core.Instance wekaInstance = new weka.core.Instance(wekaTrainingData.numAttributes()); wekaInstance.setDataset(wekaTrainingData); double[] histogramPercent = instance.getHistogramPercent(); for (int i = 0; i < histogramPercent.length; i++) { wekaInstance.setValue(i, histogramPercent[i]); } wekaInstance.setMissing(wekaTrainingData.attribute("class")); double wekaClassification = classifier.classifyInstance(wekaInstance); String classification = wekaTrainingData.attribute("class").value((int)wekaClassification); return classification; }
public List<String> performPrediction(Classifier cl, Instances data) throws Exception { List<String> results = new ArrayList<>(); for (int j = 0; j < data.size(); j++) { double[] vals = null; try { vals = cl.distributionForInstance(data.instance(j)); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } List<String> outcomes = new ArrayList<String>(); for (int i = 0; i < vals.length; i++) { if (vals[i] >= threshold) { String label = data.instance(j).attribute(i).name(); outcomes.add(label); } } results.add(StringUtils.join(outcomes, ",")); } return results; }
/** * Calculates the class membership probabilities for the given test instance. * * @param instance the instance to be classified * @return predicted class probability distribution * @throws Exception if instance could not be classified successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { // default model? if (m_NumIterationsPerformed == 0) { return m_ZeroR.distributionForInstance(instance); } if (m_NumIterationsPerformed == 0) { throw new Exception("No model built"); } double[] sums = new double[instance.numClasses()]; if (m_NumIterationsPerformed == 1) { return m_Classifiers[0].distributionForInstance(instance); } else { for (int i = 0; i < m_NumIterationsPerformed; i++) { sums[(int) m_Classifiers[i].classifyInstance(instance)] += m_Betas[i]; } return Utils.logs2probs(sums); } }
/** * Build the classifier on the filtered data. * * @param data the training data * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (m_Classifier == null) { throw new Exception("No base classifier has been set!"); } getCapabilities().testWithFail(data); Random r = (data.numInstances() > 0) ? data.getRandomNumberGenerator(getSeed()) : new Random(getSeed()); data = setUp(data, r); if (!data.allInstanceWeightsIdentical() && !(m_Classifier instanceof WeightedInstancesHandler)) { data = data.resampleWithWeights(r); // The filter may have assigned weights. } if (!data.allAttributeWeightsIdentical() && !(m_Classifier instanceof WeightedAttributesHandler)) { data = resampleAttributes(data, false, r); } // can classifier handle the data? getClassifier().getCapabilities().testWithFail(data); if (m_Classifier instanceof Randomizable) { ((Randomizable)m_Classifier).setSeed(r.nextInt()); } m_Classifier.buildClassifier(data); }
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); int N = D.numInstances(); int L = D.classIndex(); h = new Classifier[L]; u = new Random(m_S); D_templates = new Instances[L]; // Build L probabilistic models, each to predict Y_i | X, Y_{-y}; save the templates. for(int j = 0; j < L; j++) { // X = [Y[0],...,Y[j-1],Y[j+1],...,Y[L],X] D_templates[j] = new Instances(D); D_templates[j].setClassIndex(j); // train H[j] : X -> Y h[j] = AbstractClassifier.forName(getClassifier().getClass().getName(),((AbstractClassifier)getClassifier()).getOptions()); h[j].buildClassifier(D_templates[j]); } }
/** * Train classifier <code>h</code>, on dataset <code>D</code>, under super-class partition <code>partition</code>. */ public void trainClassifier(Classifier h, Instances D, int partition[][]) throws Exception { f = new SuperNodeFilter(); f.setIndices(partition); f.setP(m_P >= 0 ? m_P : rand.nextInt(Math.abs(m_P))); f.setN(m_N >= 0 ? m_N : rand.nextInt(Math.abs(m_N))); Instances D_ = f.process(D); //int K[] = MLUtils.getK(D_); <-- if some K[j] < 2, this is a problem! if (getDebug()) { int N = D.numInstances(); int U = MLUtils.numberOfUniqueCombinations(D); System.out.println("PS("+f.getP()+","+m_N+") reduced: "+N+" -> "+D_.numInstances()+" / "+U+" -> "+ MLUtils.numberOfUniqueCombinations(D_)); //System.out.println("E_acc P "+f.getP()+" "+(D_.numInstances()/(double)N) +" "+(MLUtils.numberOfUniqueCombinations(D_)/(double)U)); //m_Info = "P="+f.getP()+"; %N="+(D_.numInstances()/(double)N) +"; %C"+(MLUtils.numberOfUniqueCombinations(D_)/(double)U)+"; size(partition)="+partition.length; } m_InstancesTemplate = D_; m_Classifier.buildClassifier(D_); // build on the processed batch }
getCapabilities().testWithFail(instances); Instances trainData = new Instances(instances); trainData.deleteWithMissingClass(); Instances trainDataCopy = new Instances(trainData); // Just in case base classifier is sensitive to order of data. m_BestPerformance = -99; m_NumAttributes = trainData.numAttributes(); Random random = new Random(m_Seed); trainData.randomize(random); m_TrainFoldSize = trainData.trainCV(m_NumFolds, 0).numInstances(); m_Classifier.buildClassifier(trainDataCopy); m_BestClassifierOptions = m_InitOptions; return; if (trainData.classAttribute().isNominal()) { trainData.stratify(m_NumFolds); Utils.getOption(((CVParameter)m_CVParams.elementAt(i)).m_ParamChar, m_ClassifierOptions); m_Classifier.buildClassifier(trainDataCopy);
/** * builds the classifier. * * @param data the training data to be used for generating the * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); boolean resample = getForceResampleWithWeights() || (!(m_Classifier instanceof WeightedInstancesHandler) && ResampleUtils.hasInstanceWeights(data)); if (resample) { if (getDebug()) System.err.println(getClass().getName() + ": resampling training data"); data = data.resampleWithWeights(new Random(m_Seed)); } m_Classifier.buildClassifier(data); }
/** * Initialize the classifier. * * @param data the training data to be used for generating the boosted * classifier. * @throws Exception if the classifier could not be built successfully */ public void initializeClassifier(Instances data) throws Exception { super.buildClassifier(data); // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); m_NumClasses = data.numClasses(); m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; m_TrainingData = new Instances(data); m_RandomInstance = new Random(m_Seed); if ((m_UseResampling) || (!(m_Classifier instanceof WeightedInstancesHandler))) { // Normalize weights so that they sum to one and can be used as sampling probabilities double sumProbs = m_TrainingData.sumOfWeights(); for (int i = 0; i < m_TrainingData.numInstances(); i++) { m_TrainingData.instance(i).setWeight(m_TrainingData.instance(i).weight() / sumProbs); } } }
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); int L = D.classIndex(); if(getDebug()) System.out.print("Creating "+L+" models ("+m_Classifier.getClass().getName()+"): "); m_MultiClassifiers = AbstractClassifier.makeCopies(m_Classifier,L); m_Templates = new Instances[L]; for(int j = 0; j < L; j++) { //Select only class attribute 'j' m_Templates[j] = MLUtils.keepAttributesAt(new Instances(D),new int[]{j},L); m_Templates[j].setClassIndex(0); //Build the classifier for that class m_MultiClassifiers[j].buildClassifier(m_Templates[j]); if(getDebug()) System.out.print(" " + (m_Templates[j].classAttribute().name())); m_Templates[j] = new Instances(m_Templates[j], 0); } }
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); int L = D.classIndex(); // Transform Instances if(getDebug()) System.out.print("Transforming Instances ..."); Instances D_ = PSUtils.LCTransformation(D,L); m_InstancesTemplate = new Instances(D_,0); // Set Info ; Build Classifier info = "K = "+m_InstancesTemplate.attribute(0).numValues() + ", N = "+D_.numInstances(); if(getDebug()) System.out.print("Building Classifier ("+info+"), ..."); m_Classifier.buildClassifier(D_); if(getDebug()) System.out.println("Done"); }
double[] confidences = new double[numLabels]; Instances knn = new Instances(lnn.kNearestNeighbours(instance, numOfNeighbors)); double value = Double.parseDouble(train.attribute( labelIndices[i]).value( (int) knn.instance(k).value(labelIndices[i]))); if (Utils.eq(value, 1.0)) { count_for_label_i++; attvalue = new double[instance.numAttributes() + 1]; attvalue[m] = instance.value(featureIndices[m]); System.arraycopy(confidences, 0, attvalue, train.numAttributes() - numLabels, confidences.length); attvalue[attvalue.length - 1] = instance.value(train.numAttributes() - numLabels + j); Instance newInst = DataUtils.createInstance(instance, 1, attvalue); conf_corrected[j] = classifier[j].distributionForInstance(newInst)[1];
public double[] distributionForInstance(Instance instance) throws Exception { double[] sums = new double[instance.numClasses()], newProbs; if (instance.classAttribute().isNumeric()) { sums[0] += m_Classifiers[i].classifyInstance(instance); } else { newProbs = m_Classifiers[i].distributionForInstance(instance); for (int j = 0; j < newProbs.length; j++) sums[j] += newProbs[j]; if (instance.classAttribute().isNumeric()) { sums[0] /= (double) m_NumIterations; return sums; } else if (Utils.eq(Utils.sum(sums), 0)) { return sums; } else { Utils.normalize(sums); return sums;
/** * Replace the class values of the instances from the current iteration * with residuals after predicting with the supplied classifier. * * @param data the instances to predict * @param c the classifier to use * @return a new set of instances with class values replaced by residuals * @throws Exception if something goes wrong */ private Instances residualReplace(Instances data, Classifier c) throws Exception { Instances newInst = new Instances(data); for (int i = 0; i < newInst.numInstances(); i++) { double pred = c.classifyInstance(newInst.instance(i)); if (Utils.isMissingValue(pred)) { throw new UnassignedClassException("AdditiveRegression: base learner predicted missing value."); } newInst.instance(i).setClassValue(newInst.instance(i).classValue() - (pred * getShrinkage())); } return newInst; }
protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception { double[] confidences = new double[numLabels]; boolean[] bipartition = new boolean[numLabels]; Instance newInstance = pt6Trans.transformInstance(instance); //calculate confidences //debug(instance.toString()); for (int i = 0; i < numLabels; i++) { newInstance.setDataset(transformed); newInstance.setValue(newInstance.numAttributes() - 2, instance.dataset().attribute(labelIndices[i]).name()); //debug(newInstance.toString()); double[] temp = baseClassifier.distributionForInstance(newInstance); //debug(temp.toString()); confidences[i] = temp[transformed.classAttribute().indexOfValue("1")]; //debug("" + confidences[i]); bipartition[i] = temp[transformed.classAttribute().indexOfValue("1")] >= temp[transformed.classAttribute().indexOfValue("0")] ? true : false; //debug("" + bipartition[i]); } MultiLabelOutput mlo = new MultiLabelOutput(bipartition, confidences); return mlo; } }
Instances insts = new Instances(exmp.dataset(), 0); insts.add(exmp); insts.deleteAttributeAt(0); // remove the bagIndex attribute double n = insts.numInstances(); Instance ins = insts.instance(y); for (int x = 0; x < m_NumIterations; x++) { rt[(int) m_Models[x].classifyInstance(ins)] += m_Beta[x] / n; Utils.normalize(rt); return rt;
@Override public double[] distributionForInstance(Instance x) throws Exception { int L = x.classIndex(); //if there is only one class (as for e.g. in some hier. mtds) predict it //if(L == 1) return new double[]{1.0}; Instance x_sl = convertInstance(x,L); // the sl instance x_sl.setDataset(m_InstancesTemplate); // where y in {comb_1,comb_2,...,comb_k} double w[] = m_Classifier.distributionForInstance(x_sl); // w[j] = p(y_j) for each j = 1,...,L int max_j = Utils.maxIndex(w); // j of max w[j] //int max_j = (int)m_Classifier.classifyInstance(x_sl); // where comb_i is selected String y_max = m_InstancesTemplate.classAttribute().value(max_j); // comb_i e.g. "0+3+0+0+1+2+0+0" double y[] = Arrays.copyOf(MLUtils.toDoubleArray(MLUtils.decodeValue(y_max)),L*2); // "0+3+0+0+1+2+0+0" -> [0.0,3.0,0.0,...,0.0] HashMap<Double,Double> votes[] = new HashMap[L]; for(int j = 0; j < L; j++) { votes[j] = new HashMap<Double,Double>(); } for(int i = 0; i < w.length; i++) { double y_i[] = MLUtils.toDoubleArray(MLUtils.decodeValue(m_InstancesTemplate.classAttribute().value(i))); for(int j = 0; j < y_i.length; j++) { votes[j].put(y_i[j] , votes[j].containsKey(y_i[j]) ? votes[j].get(y_i[j]) + w[i] : w[i]); } } // some confidence information for(int j = 0; j < L; j++) { y[j+L] = votes[j].size() > 0 ? Collections.max(votes[j].values()) : 0.0; } return y; }
/** * Makes a level-1 instance from the given instance. * * @param instance the instance to be transformed * @return the level-1 instance * @throws Exception if the instance generation fails */ protected Instance metaInstance(Instance instance) throws Exception { double[] values = new double[m_MetaFormat.numAttributes()]; Instance metaInstance; int i = 0; for (int k = 0; k < m_Classifiers.length; k++) { Classifier classifier = getClassifier(k); if (m_BaseFormat.classAttribute().isNumeric()) { values[i++] = classifier.classifyInstance(instance); } else { double[] dist = classifier.distributionForInstance(instance); for (int j = 0; j < dist.length; j++) { values[i++] = dist[j]; } } } values[i] = instance.classValue(); metaInstance = new DenseInstance(1, values); metaInstance.setDataset(m_MetaFormat); return metaInstance; }
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); for (int i = 0; i < D.numInstances(); i++) { m_Count.put(MLUtils.toBitString(D.instance(i),D.classIndex()),0); } m_Classifier.buildClassifier(D); }