@Override public Capabilities getCapabilities() { Capabilities result = new Capabilities(this); result.disableAll(); // attributes result.enableAllAttributes(); result.enable(Capability.MISSING_VALUES); // class result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); result.setMinimumNumberInstances(0); return result; }
/** * TestCapabilities. * Make sure the training data is suitable. * @param D the data */ public void testCapabilities(Instances D) throws Exception { // get the classifier's capabilities, enable all class attributes and do the usual test Capabilities cap = getCapabilities(); cap.enableAllClasses(); // get the capabilities again, test class attributes individually int L = D.classIndex(); for(int j = 0; j < L; j++) { Attribute c = D.attribute(j); cap.testWithFail(c,true); } }
/** * disables all attribute and class types (including dependencies) */ public void disableAll() { disableAllAttributes(); disableAllAttributeDependencies(); disableAllClasses(); disableAllClassDependencies(); disable(Capability.MISSING_VALUES); disable(Capability.MISSING_CLASS_VALUES); disable(Capability.NO_CLASS); }
/** * enables all attribute and class types (including dependencies) */ public void enableAll() { enableAllAttributes(); enableAllAttributeDependencies(); enableAllClasses(); enableAllClassDependencies(); enable(Capability.MISSING_VALUES); enable(Capability.MISSING_CLASS_VALUES); }
/** * Build the classifier on the filtered data. * * @param data the training data * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (m_Classifier == null) { throw new Exception("No base classifier has been set!"); } getCapabilities().testWithFail(data); Random r = (data.numInstances() > 0) ? data.getRandomNumberGenerator(getSeed()) : new Random(getSeed()); data = setUp(data, r); if (!data.allInstanceWeightsIdentical() && !(m_Classifier instanceof WeightedInstancesHandler)) { data = data.resampleWithWeights(r); // The filter may have assigned weights. } if (!data.allAttributeWeightsIdentical() && !(m_Classifier instanceof WeightedAttributesHandler)) { data = resampleAttributes(data, false, r); } // can classifier handle the data? getClassifier().getCapabilities().testWithFail(data); if (m_Classifier instanceof Randomizable) { ((Randomizable)m_Classifier).setSeed(r.nextInt()); } m_Classifier.buildClassifier(data); }
getCapabilities().testWithFail(data); Instances newData = new Instances(data); if (newData.numInstances() == 0) { m_Classifier.buildClassifier(newData); return; if (newData.classAttribute().isNominal()) { m_numClasses = newData.classAttribute().numValues(); } else { m_numClasses = 1; double weight = newData.instance(0).weight(); boolean ok = false; for (int i = 1; i < newData.numInstances(); i++) { if (newData.instance(i).weight() != weight) { ok = true; break; if (!(m_Evaluator instanceof WeightedInstancesHandler) || !(m_Classifier instanceof WeightedInstancesHandler)) { Random r = new Random(1); for (int i = 0; i < 10; i++) { r.nextDouble(); resampledData = newData.resampleWithWeights(r);
getCapabilities().testWithFail(data); m_theInstances = new Instances(data); m_theInstances.deleteWithMissingClass(); m_rr = new Random(1); if (m_theInstances.classAttribute().isNominal()) {// Set up class priors m_classPriorCounts = new double[data.classAttribute().numValues()]; Arrays.fill(m_classPriorCounts, 1.0); for (int i = 0; i < data.numInstances(); i++) { Instance curr = data.instance(i); m_classPriorCounts[(int) curr.classValue()] += curr.weight(); Utils.normalize(m_classPriors); if (m_theInstances.classAttribute().isNumeric()) { m_disTransform = new weka.filters.unsupervised.attribute.Discretize(); m_classIsNominal = false;
getCapabilities().testWithFail(instances); Instances trainData = new Instances(instances); trainData.deleteWithMissingClass(); Instances trainDataCopy = new Instances(trainData); // Just in case base classifier is sensitive to order of data. m_BestPerformance = -99; m_NumAttributes = trainData.numAttributes(); Random random = new Random(m_Seed); trainData.randomize(random); m_TrainFoldSize = trainData.trainCV(m_NumFolds, 0).numInstances(); m_Classifier.buildClassifier(trainDataCopy); m_BestClassifierOptions = m_InitOptions; return; if (trainData.classAttribute().isNominal()) { trainData.stratify(m_NumFolds); Utils.getOption(((CVParameter)m_CVParams.elementAt(i)).m_ParamChar, m_ClassifierOptions); m_Classifier.buildClassifier(trainDataCopy);
/** * Initialize the classifier. * * @param data the training data to be used for generating the boosted * classifier. * @throws Exception if the classifier could not be built successfully */ public void initializeClassifier(Instances data) throws Exception { super.buildClassifier(data); // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); m_NumClasses = data.numClasses(); m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; m_TrainingData = new Instances(data); m_RandomInstance = new Random(m_Seed); if ((m_UseResampling) || (!(m_Classifier instanceof WeightedInstancesHandler))) { // Normalize weights so that they sum to one and can be used as sampling probabilities double sumProbs = m_TrainingData.sumOfWeights(); for (int i = 0; i < m_TrainingData.numInstances(); i++) { m_TrainingData.instance(i).setWeight(m_TrainingData.instance(i).weight() / sumProbs); } } }
getCapabilities().testWithFail(data); Instances newData = new Instances(data); m_BaseFormat = new Instances(data, 0); newData.deleteWithMissingClass(); Random random = new Random(m_Seed); newData.randomize(random); if (newData.classAttribute().isNominal()) { newData.stratify(m_NumFolds);
/** * Builds the clusterer. * * @param data the training instances. * @throws Exception if something goes wrong. */ @Override public void buildClusterer(Instances data) throws Exception { m_numberOfClusters = -1; m_cobwebTree = null; m_numberSplits = 0; m_numberMerges = 0; // can clusterer handle the data? getCapabilities().testWithFail(data); // randomize the instances data = new Instances(data); if (getSeed() >= 0) { data.randomize(new Random(getSeed())); } for (int i = 0; i < data.numInstances(); i++) { updateClusterer(data.instance(i)); } updateFinished(); }
/** * builds the classifier. * * @param data the training data to be used for generating the * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); boolean resample = getForceResampleWithWeights() || (!(m_Classifier instanceof WeightedInstancesHandler) && ResampleUtils.hasInstanceWeights(data)); if (resample) { if (getDebug()) System.err.println(getClass().getName() + ": resampling training data"); data = data.resampleWithWeights(new Random(m_Seed)); } m_Classifier.buildClassifier(data); }
m_RandomInstance = new Random(m_Seed); int classIndex = data.classIndex(); getCapabilities().testWithFail(data); m_data = new Instances(data); m_data.deleteWithMissingClass(); + "using ZeroR model instead!"); m_ZeroR = new ZeroR(); m_ZeroR.buildClassifier(m_data); return; m_ZeroR.buildClassifier(m_data); for (int i = 0; i < numInstances; i++) { m_probs[i] = m_ZeroR.distributionForInstance(m_data.instance(i)); for (int i = 0, k = 0; i < numInstances; i++, k++) { m_trainYs[i][j] = (m_data.instance(k).classValue() == j) ? 1.0 - m_Offset : 0.0 + (m_Offset / (double) m_NumClasses); m_data.setClassIndex(-1); m_data.deleteAttributeAt(classIndex); m_data.insertAttributeAt(new Attribute("'pseudo class'"), classIndex); m_data.setClassIndex(classIndex); m_NumericClassData = new Instances(m_data, 0);
getCapabilities().testWithFail(data); m_trainingInstances = new Instances(data); m_classIndex = m_trainingInstances.classIndex(); m_numAttribs = m_trainingInstances.numAttributes(); m_trainingInstances.randomize(new Random(m_seed)); int trainSize = Math.round(m_trainingInstances.numInstances() * splitPercentage / 100); m_trainingInstances.classAttribute().indexOfValue(m_IRClassValS);
public void buildClassifier(Instances instances) throws Exception { getCapabilities().testWithFail(instances); Instances data = new Instances(instances); data.deleteWithMissingClass(); if (data.numInstances() < m_Folds) { throw new Exception("Not enough data for REP."); m_ClassAttribute = data.classAttribute(); if (m_ClassAttribute.isNominal()) { m_NumClasses = m_ClassAttribute.numValues(); } else { m_NumClasses = 1; m_Cnsqt = new double[m_NumClasses]; m_Targets = new ArrayList<double[][]>(); m_Random = new Random(m_Seed); if (m_ClassAttribute.isNominal()) { Utils.normalize(m_Cnsqt); if (Utils.gr(Utils.sum(m_DefDstr), 0)) { Utils.normalize(m_DefDstr);
getCapabilities().testWithFail(data); Instances newData = new Instances(data); newData.deleteWithMissingClass(); Random random = new Random(m_Seed); newData.randomize(random); if (newData.classAttribute().isNominal() && (m_NumXValFolds > 1)) { newData.stratify(m_NumXValFolds); train = newData.trainCV(m_NumXValFolds, j, new Random (1)); test = newData.testCV(m_NumXValFolds, j); currentClassifier.buildClassifier(train); evaluation.setPriors(train); evaluation.evaluateModel(currentClassifier, test); currentClassifier.buildClassifier(train); evaluation = new Evaluation(train); evaluation.evaluateModel(currentClassifier, test); System.err.println("Error rate: " + Utils.doubleToString(error, 6, 4) + " for classifier " + currentClassifier.getClass().getName()); bestClassifier.buildClassifier(newData);
for (int i = 0; i < data.numInstances(); i++) { if (data.instance(i).weight() != 1.0) { resample = true; break; if (getDebug()) System.err.println(getClass().getName() + ": resampling training data"); data = data.resampleWithWeights(new Random(m_Seed)); getCapabilities().testWithFail(data);
getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass(); if (data.numInstances() > 0 && !m_dontReplaceMissing) { m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(data); for (int i = 0; i < data.numAttributes(); i++) { if (i != data.classIndex()) { if (!data.attribute(i).isNumeric()) { onlyNumeric = false; break; if (data.numInstances() > 0) { data.randomize(new Random(getSeed())); // randomize the data train(data);
getCapabilities().testWithFail(data); m_data = new Instances( data ); super.buildClassifier(m_data); if( m_data.numInstances() > 0 ) { m_random = m_data.getRandomNumberGenerator(m_Seed); m_random = new Random(m_Seed); if( m_data.classAttribute().isNumeric() ) { m_instancesOfClasses = new Instances[numClasses]; m_instancesOfClasses[0] = m_data; while( enu.hasMoreElements() ) { Instance instance = (Instance)enu.nextElement(); if( instance.classIsMissing() ) { m_instancesOfClasses[numClasses].add( instance ); int c = (int)instance.classValue(); m_instancesOfClasses[c].add( instance );
getCapabilities().testWithFail(insts); insts = new Instances(insts); insts.deleteWithMissingClass(); m_Train = new Instances(insts); m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(m_Train); m_Train.randomize(new Random(m_Seed)); for (int i = 0; i < m_Train.numInstances(); i++) { Instance inst = m_Train.instance(i); if (!inst.classIsMissing()) { int prediction = makePrediction(m_K, inst); int classValue = (int) inst.classValue(); if (prediction == classValue) { m_Weights[m_K]++;