/** * Stack two Instances together row-wise. */ public static final Instances combineInstances(Instances D1, Instances D2) { Instances D = new Instances(D1); for(int i = 0; i < D2.numInstances(); i++) { D.add(D2.instance(i)); } return D; }
/** * Add the supplied instances to the training header * * @param toAdd the instances to add */ public void addToTrainingHeader(Instances toAdd) { for (int i = 0; i < toAdd.numInstances(); i++) { m_trainingHeader.add(toAdd.instance(i)); } }
/** * LabelCardinality - return the label cardinality of dataset D of L labels. */ public static final double labelCardinality(Instances D, int L) { double sum = 0.0; double numInstances = (double)D.numInstances(); for(int i = 0; i < D.numInstances(); i++) { for(int j = 0; j < L; j++) { if (!D.instance(i).isMissing(j)) { sum += D.instance(i).value(j); } } } return (double)sum/ numInstances; }
public void testFloor() { m_Filter = getFilter("floor(a6+a3/5)"); Instances result = useFilter(); for (int i = 0; i < result.numInstances(); i++) { Instance inst = result.instance(i); assertEquals("Instance " + (i + 1), Math.floor(inst.value(5) + inst.value(2)/5), inst.value(inst.numAttributes() - 1), EXPR_DELTA); } }
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; } for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
@Override public Instance transformInstance(Instance x) throws Exception{ Instances tmpInst = new Instances(x.dataset()); tmpInst.delete(); tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedTemplateInst); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for ( int i = 0; i< pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(pseudoLabels.numAttributes()); return newDataSet.instance(0); }
@Override public Instance transformInstance(Instance x) throws Exception{ Instances tmpInst = new Instances(x.dataset()); tmpInst.delete(); tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedMatrix); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for ( int i = 0; i< pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(this.size); return newDataSet.instance(0); }
public void testPruneMinFreq() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(1); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // min freq of 1 should keep all terms assertEquals(15, consolidated.size()); }
/** * Calculates the distribution, in the dataset, of the indexed nominal * attribute values. It also counts the actual number of training instances * that contributed (those with non-missing values) to calculate the * distribution. */ private void generateAttrDistribution() { m_Distribution = new int[m_TrainSet.attribute(m_AttrIndex).numValues()]; int i; Instance train; for (i = 0; i < m_NumInstances; i++) { train = m_TrainSet.instance(i); if (!train.isMissing(m_AttrIndex)) { m_TotalCount++; m_Distribution[(int) train.value(m_AttrIndex)]++; } } }
/** * LabelCardinality - return the label cardinality of dataset D of L labels. */ public static final double labelCardinality(Instances D, int L) { double sum = 0.0; double numInstances = (double)D.numInstances(); for(int i = 0; i < D.numInstances(); i++) { for(int j = 0; j < L; j++) { if (!D.instance(i).isMissing(j)) { sum += D.instance(i).value(j); } } } return (double)sum/ numInstances; }
/** * Stack two Instances together row-wise. */ public static final Instances combineInstances(Instances D1, Instances D2) { Instances D = new Instances(D1); for(int i = 0; i < D2.numInstances(); i++) { D.add(D2.instance(i)); } return D; }
public void testExp() { m_Filter = getFilter("exp(a6-a3)"); Instances result = useFilter(); for (int i = 0; i < result.numInstances(); i++) { Instance inst = result.instance(i); assertEquals("Instance " + (i + 1), Math.exp(inst.value(5) - inst.value(2)), inst.value(inst.numAttributes() - 1), EXPR_DELTA); } }
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; } for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
@Override public Instance transformInstance(Instance x) throws Exception{ Instances tmpInst = new Instances(x.dataset()); tmpInst.delete(); tmpInst.add(x); Instances features = this.extractPart(tmpInst, false); Instances pseudoLabels = new Instances(this.compressedTemplateInst); Instance tmpin = pseudoLabels.instance(0); pseudoLabels.delete(); pseudoLabels.add(tmpin); for ( int i = 0; i< pseudoLabels.classIndex(); i++) { pseudoLabels.instance(0).setMissing(i); } Instances newDataSet = Instances.mergeInstances(pseudoLabels, features); newDataSet.setClassIndex(pseudoLabels.numAttributes()); return newDataSet.instance(0); }
public void testFinalizeDictionaryNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // all but "the" and "over" should have been pruned from the dictionary assertEquals(2, consolidated.size()); }
/** * Calculates the distribution, in the dataset, of the indexed nominal * attribute values. It also counts the actual number of training instances * that contributed (those with non-missing values) to calculate the * distribution. */ private void generateAttrDistribution() { m_Distribution = new int[m_TrainSet.attribute(m_AttrIndex).numValues()]; int i; Instance train; for (i = 0; i < m_NumInstances; i++) { train = m_TrainSet.instance(i); if (!train.isMissing(m_AttrIndex)) { m_TotalCount++; m_Distribution[(int) train.value(m_AttrIndex)]++; } } }
/** * Adds the supplied node as a child of this node. All of the child's * instances are added to this nodes instances * * @param child the child to add */ protected void addChildNode(CNode child) { for (int i = 0; i < child.m_clusterInstances.numInstances(); i++) { Instance temp = child.m_clusterInstances.instance(i); m_clusterInstances.add(temp); updateStats(temp, false); } if (m_children == null) { m_children = new ArrayList<CNode>(); } m_children.add(child); }
/** * jPMF - Joint PMF. * @return the joint PMF of the j-th and k-th labels in D. */ public static double[][] jPMF(Instances D, int j, int k) { double JOINT[][] = new double[D.attribute(j).numValues()][D.attribute(k).numValues()]; int N = D.numInstances(); for(int i = 0; i < N; i++) { int v_j = (int)Math.round(D.instance(i).value(j)); int v_k = (int)Math.round(D.instance(i).value(k)); JOINT[v_j][v_k] += (1.0 / (double)N); } return JOINT; }
public static final Instances combineInstances(Instances D1, Instances D2) { Instances D = new Instances(D1); for(int i = 0; i < D2.numInstances(); i++) { D.add(D2.instance(i)); } return D; }
public void testCeil() { m_Filter = getFilter("ceil(a6*a3/5)"); Instances result = useFilter(); for (int i = 0; i < result.numInstances(); i++) { Instance inst = result.instance(i); assertEquals("Instance " + (i + 1), Math.ceil(inst.value(5) * inst.value(2)/5), inst.value(inst.numAttributes() - 1), EXPR_DELTA); } }