public void testTypicalNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testTypicalNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testSaveLoadDictionaryPlainTextNormalize() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setNormalize(true); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); StringWriter sw = new StringWriter(); builder.saveDictionary(sw); String dictText = sw.toString(); assertTrue(dictText.startsWith("@@@3.39036")); StringReader sr = new StringReader(dictText); DictionaryBuilder builder2 = new DictionaryBuilder(); builder2.setup(structure); builder2.loadDictionary(sr); // just returns the loaded dictionary Map<String, int[]> consolidated = builder2.finalizeDictionary(); assertEquals(2, consolidated.size()); }
public void testTypicalNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testTypicalNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testSaveLoadDictionaryPlainTextNormalize() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setNormalize(true); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); StringWriter sw = new StringWriter(); builder.saveDictionary(sw); String dictText = sw.toString(); assertTrue(dictText.startsWith("@@@3.39036")); StringReader sr = new StringReader(dictText); DictionaryBuilder builder2 = new DictionaryBuilder(); builder2.setup(structure); builder2.loadDictionary(sr); // just returns the loaded dictionary Map<String, int[]> consolidated = builder2.finalizeDictionary(); assertEquals(2, consolidated.size()); }
public void testTypicalClassAttPresent() throws Exception { Instances data2 = getData2(); Instances structure = new Instances(data2, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data2.numInstances(); i++) { builder.processInstance(data2.instance(i)); } // should be two dictionaries (one for each class) assertEquals(2, builder.getDictionaries(false).length); assertEquals(8, builder.getDictionaries(false)[0].size()); assertEquals(9, builder.getDictionaries(false)[1].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // first dictionary: word count (index 0) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[0]); // first dictionary: doc count (index 1) should be 1 assertEquals(1, builder.getDictionaries(false)[0].get("the")[1]); }
public void testTypicalClassAttPresent() throws Exception { Instances data2 = getData2(); Instances structure = new Instances(data2, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data2.numInstances(); i++) { builder.processInstance(data2.instance(i)); } // should be two dictionaries (one for each class) assertEquals(2, builder.getDictionaries(false).length); assertEquals(8, builder.getDictionaries(false)[0].size()); assertEquals(9, builder.getDictionaries(false)[1].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // first dictionary: word count (index 0) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[0]); // first dictionary: doc count (index 1) should be 1 assertEquals(1, builder.getDictionaries(false)[0].get("the")[1]); }
public void testGetVectorizedStructureNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instances format = builder.getVectorizedFormat(); assertTrue(format != null); assertEquals(2, format.numAttributes()); }
public void testGetVectorizedStructureNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instances format = builder.getVectorizedFormat(); assertTrue(format != null); assertEquals(4, format.numAttributes()); }
public void testGetVectorizedStructureNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instances format = builder.getVectorizedFormat(); assertTrue(format != null); assertEquals(2, format.numAttributes()); }
public void testGetVectorizedStructureNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instances format = builder.getVectorizedFormat(); assertTrue(format != null); assertEquals(4, format.numAttributes()); }