public void testTypicalNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testTypicalClassAttPresent() throws Exception { Instances data2 = getData2(); Instances structure = new Instances(data2, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data2.numInstances(); i++) { builder.processInstance(data2.instance(i)); } // should be two dictionaries (one for each class) assertEquals(2, builder.getDictionaries(false).length); assertEquals(8, builder.getDictionaries(false)[0].size()); assertEquals(9, builder.getDictionaries(false)[1].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // first dictionary: word count (index 0) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[0]); // first dictionary: doc count (index 1) should be 1 assertEquals(1, builder.getDictionaries(false)[0].get("the")[1]); }
public void testTypicalNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testInit() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); // should be just one dictionary (i.e. no class attribute, so no per-class // dictionaries) assertEquals(1, builder.getDictionaries(false).length); }
public void testAggregateDictionaries() throws Exception { Instances data1 = getData1(); Instances data4 = getData4(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(1); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } Instances structure2 = new Instances(data4, 0); DictionaryBuilder builder2 = new DictionaryBuilder(); builder2.setMinTermFreq(1); builder2.setup(structure2); for (int i = 0; i < data4.numInstances(); i++) { builder2.processInstance(data4.instance(i)); } builder = builder.aggregate(builder2); builder.finalizeAggregation(); Map<String, int[]> consolidated = builder.finalizeDictionary(); assertEquals(17, consolidated.size()); }
public void testCanonicalUserOptions() { CheckOptionHandler optionHandler = new CheckOptionHandler(); DictionaryBuilder builder = new DictionaryBuilder(); optionHandler.setOptionHandler(builder); if (!optionHandler.checkCanonicalUserOptions()) { fail("setOptions method failed"); } }
public void testFinalizeDictionaryNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // all but "the" and "over" should have been pruned from the dictionary assertEquals(2, consolidated.size()); }
public static void main(String[] args) { junit.textui.TestRunner.run( suite() ); } }
public void testInit() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); // should be just one dictionary (i.e. no class attribute, so no per-class // dictionaries) assertEquals(1, builder.getDictionaries(false).length); }
public void testAggregateDictionaries() throws Exception { Instances data1 = getData1(); Instances data4 = getData4(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(1); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } Instances structure2 = new Instances(data4, 0); DictionaryBuilder builder2 = new DictionaryBuilder(); builder2.setMinTermFreq(1); builder2.setup(structure2); for (int i = 0; i < data4.numInstances(); i++) { builder2.processInstance(data4.instance(i)); } builder = builder.aggregate(builder2); builder.finalizeAggregation(); Map<String, int[]> consolidated = builder.finalizeDictionary(); assertEquals(17, consolidated.size()); }
public void testResettingOptions() { CheckOptionHandler optionHandler = new CheckOptionHandler(); DictionaryBuilder builder = new DictionaryBuilder(); optionHandler.setOptionHandler(builder); if (!optionHandler.checkResettingOptions()) { fail("Resetting of options failed"); } }
public void testFinalizeDictionaryNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // all but "the" and "over" should have been pruned from the dictionary assertEquals(2, consolidated.size()); }
public static void main(String[] args) { junit.textui.TestRunner.run( suite() ); } }
public void testTypicalNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testPruneMinFreq() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(1); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // min freq of 1 should keep all terms assertEquals(15, consolidated.size()); }
public void testTypicalNoClassExtraAtts() throws Exception { Instances data1 = getData3(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // word count (index 0) should be 4 assertEquals(4, builder.getDictionaries(false)[0].get("the")[0]); // doc count (index 1) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[1]); }
public void testTypicalClassAttPresent() throws Exception { Instances data2 = getData2(); Instances structure = new Instances(data2, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data2.numInstances(); i++) { builder.processInstance(data2.instance(i)); } // should be two dictionaries (one for each class) assertEquals(2, builder.getDictionaries(false).length); assertEquals(8, builder.getDictionaries(false)[0].size()); assertEquals(9, builder.getDictionaries(false)[1].size()); // check a couple of words assertTrue(builder.getDictionaries(false)[0].get("the") != null); // first dictionary: word count (index 0) should be 2 assertEquals(2, builder.getDictionaries(false)[0].get("the")[0]); // first dictionary: doc count (index 1) should be 1 assertEquals(1, builder.getDictionaries(false)[0].get("the")[1]); }
public void testListOptions() { CheckOptionHandler optionHandler = new CheckOptionHandler(); DictionaryBuilder builder = new DictionaryBuilder(); optionHandler.setOptionHandler(builder); if (!optionHandler.checkListOptions()) { fail("Options cannot be listed via listOptions"); } }
public void testGetVectorizedStructureNoClass() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(2); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } builder.finalizeDictionary(); Instances format = builder.getVectorizedFormat(); assertTrue(format != null); assertEquals(2, format.numAttributes()); }
public void testPruneMinFreq() throws Exception { Instances data1 = getData1(); Instances structure = new Instances(data1, 0); DictionaryBuilder builder = new DictionaryBuilder(); builder.setMinTermFreq(1); builder.setup(structure); for (int i = 0; i < data1.numInstances(); i++) { builder.processInstance(data1.instance(i)); } assertEquals(15, builder.getDictionaries(false)[0].size()); Map<String, int[]> consolidated = builder.finalizeDictionary(); // min freq of 1 should keep all terms assertEquals(15, consolidated.size()); }