public void testFinalizeDictionaryNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // all but "the" and "over" should have been pruned from the dictionary assertEquals(2, consolidated.size()); }
public void testFinalizeDictionaryNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // all but "the" and "over" should have been pruned from the dictionary assertEquals(2, consolidated.size()); }
public void testTypicalNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testTypicalNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testGetVectorizedStructureNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instances format = builder.getVectorizedFormat(); assertTrue(format != null); assertEquals(4, format.numAttributes()); }
public void testGetVectorizedStructureNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instances format = builder.getVectorizedFormat(); assertTrue(format != null); assertEquals(4, format.numAttributes()); }