public void train(ListDataSet dataSet) throws Exception { instances = new DataSetToInstancesWrapper(dataSet, discrete, false); wekaClusterer.buildClusterer(instances); }
@Override protected MultiLabelOutput makePredictionInternal(Instance instance) throws Exception, InvalidDataException { Instance newInstance = RemoveAllLabels.transformInstance(instance, labelIndices); int cluster = clusterer.clusterInstance(newInstance); return multi[cluster].makePrediction(instance); }
@Override public double[] distributionForInstance(Instance inst) throws Exception { return m_model.distributionForInstance(inst); }
clusterer.buildClusterer(removedInstances); numClusters = clusterer.numberOfClusters(); int clusterOfInstance = clusterer.clusterInstance(removedInstances.instance(i)); subsetMultiLabelInstances[clusterOfInstance].getDataSet().add(trainInstances.instance(i));
public Dataset[] cluster(Dataset data) { try { /* * Weka cannot handle class values in clustering, so we have to * store and remove them first */ HashMap<Integer, Object> classValueMapping = new HashMap<Integer, Object>(); for (Instance i : data) { classValueMapping.put(i.getID(), i.classValue()); i.setClassValue(null); } data.classes().clear(); /* Convert to Weka and train clustering */ Instances insts = new ToWekaUtils(data).getDataset(); Vector<Dataset> output = new Vector<Dataset>(); wekaCluster.buildClusterer(insts); /* Apply clustering to the data set and restore class values */ for (int i = 0; i < insts.numInstances(); i++) { int clusterIndex = wekaCluster.clusterInstance(insts.instance(i)); while (output.size() <= clusterIndex) output.add(new DefaultDataset()); data.instance(i).setClassValue(classValueMapping.get(data.instance(i).getID())); output.get(clusterIndex).add(data.instance(i)); } return output.toArray(new Dataset[output.size()]); } catch (Exception e) { throw new WekaException(e); } }
int cnum; double loglk = 0.0; int cc = clusterer.numberOfClusters(); double[] instanceStats = new double[cc]; int unclusteredInstances = 0; } else { try { cnum = clusterer.clusterInstance(inst);
double predCluster = clusterer.clusterInstance(testSet.instance(i)); newInstances.instance(i).setValue(newInstances.numAttributes() - 1, predCluster); clusterer.distributionForInstance(testSet.instance(i)); for (int j = 0; j < clusterer.numberOfClusters(); j++) { newInstances.instance(i).setValue(testSet.numAttributes() + j, probs[j]);
/** * Returns the number of clusters. * * @return the number of clusters generated for a training dataset. * @throws Exception if number of clusters could not be returned successfully */ @Override public int numberOfClusters() throws Exception { return m_wrappedClusterer.numberOfClusters(); }
m_ActualClusterer.buildClusterer(toFilterIgnoringAttributes); m_ActualClusterer.numberOfClusters()); for (int i = 0; i < m_ActualClusterer.numberOfClusters(); i++) { nominal_values.add("cluster" + (i + 1));
/** * Build the clusterer on the filtered data. * * @param data the training data * @throws Exception if the clusterer could not be built successfully */ @Override public void buildClusterer(Instances data) throws Exception { if (m_Clusterer == null) { throw new Exception("No base clusterer has been set!"); } // remove instances with missing class if (data.classIndex() > -1) { data = new Instances(data); data.deleteWithMissingClass(); } m_Filter.setInputFormat(data); // filter capabilities are checked here data = Filter.useFilter(data, m_Filter); // can clusterer handle the data? getClusterer().getCapabilities().testWithFail(data); m_FilteredInstances = data.stringFreeStructure(); m_Clusterer.buildClusterer(data); }
/** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = m_Clusterer.getCapabilities(); result.enableAllClasses(); result.setMinimumNumberInstances(0); return result; }
throw new Exception("No clusterer has been set"); m_wrappedClusterer.buildClusterer(data); m_model = new DiscreteEstimator[m_wrappedClusterer.numberOfClusters()][data .numAttributes()]; m_modelNormal = new double[m_wrappedClusterer.numberOfClusters()][data .numAttributes()][2]; double[][] weights = new double[m_wrappedClusterer.numberOfClusters()][data .numAttributes()]; m_priors = new double[m_wrappedClusterer.numberOfClusters()]; for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { m_priors[i] = 1.0; // laplace correction for (int j = 0; j < data.numAttributes(); j++) { for (int i = 0; i < data.numInstances(); i++) { inst = data.instance(i); int cluster = m_wrappedClusterer.clusterInstance(inst); m_priors[cluster] += inst.weight(); for (int j = 0; j < data.numAttributes(); j++) { for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { if (weights[i][j] > 0) { m_modelNormal[i][j][0] /= weights[i][j]; for (int i = 0; i < m_wrappedClusterer.numberOfClusters(); i++) { if (weights[i][j] > 0) { m_modelNormal[i][j][1] = Math.sqrt(m_modelNormal[i][j][1]
clusterer.buildClusterer(data); result += "\n"; try { cluster = clusterer.clusterInstance(data.instance(i)); result += "" + (i+1) + ": " + cluster;
int cnum; double loglk = 0.0; int cc = clusterer.numberOfClusters(); double[] instanceStats = new double[cc]; int unclusteredInstances = 0; } else { try { cnum = clusterer.clusterInstance(inst);
double predCluster = clusterer.clusterInstance(testSet.instance(i)); newInstances.instance(i).setValue(newInstances.numAttributes() - 1, predCluster); clusterer.distributionForInstance(testSet.instance(i)); for (int j = 0; j < clusterer.numberOfClusters(); j++) { newInstances.instance(i).setValue(testSet.numAttributes() + j, probs[j]);
/** * Returns the number of clusters. * * @return the number of clusters generated for a training dataset. * @throws Exception if number of clusters could not be returned successfully */ @Override public int numberOfClusters() throws Exception { return m_Clusterer.numberOfClusters(); } }
m_ActualClusterer.buildClusterer(toFilterIgnoringAttributes); m_ActualClusterer.numberOfClusters()); for (int i = 0; i < m_ActualClusterer.numberOfClusters(); i++) { nominal_values.add("cluster" + (i + 1));
/** * Build the clusterer on the filtered data. * * @param data the training data * @throws Exception if the clusterer could not be built successfully */ @Override public void buildClusterer(Instances data) throws Exception { if (m_Clusterer == null) { throw new Exception("No base clusterer has been set!"); } // remove instances with missing class if (data.classIndex() > -1) { data = new Instances(data); data.deleteWithMissingClass(); } m_Filter.setInputFormat(data); // filter capabilities are checked here data = Filter.useFilter(data, m_Filter); // can clusterer handle the data? getClusterer().getCapabilities().testWithFail(data); m_FilteredInstances = data.stringFreeStructure(); m_Clusterer.buildClusterer(data); }
/** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = m_Clusterer.getCapabilities(); result.enableAllClasses(); result.setMinimumNumberInstances(0); return result; }
/** * Builds the clusters */ private void buildClusterer() throws Exception { if (m_trainingSet.classIndex() < 0) { m_Clusterer.buildClusterer(m_trainingSet); } else { // class based evaluation if class attribute is set Remove removeClass = new Remove(); removeClass.setAttributeIndices("" + (m_trainingSet.classIndex() + 1)); removeClass.setInvertSelection(false); removeClass.setInputFormat(m_trainingSet); Instances clusterTrain = Filter.useFilter(m_trainingSet, removeClass); m_Clusterer.buildClusterer(clusterTrain); } }