@Override public Dictionary<String> build() throws IOException { return builder.build(); }
@Test public void testEmptyDict() { ArrayList<String> strs = new ArrayList<String>(); TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0); TrieDictionaryForest<String> dict = builder.build(); assertSameBehaviorAsTrie(dict, strs, 0); }
@Test public void testBasicFound() { ArrayList<String> strs = new ArrayList<String>(); strs.add("part"); strs.add("par"); strs.add("partition"); strs.add("party"); strs.add("parties"); strs.add("paint"); Collections.sort(strs); int baseId = 0; TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, baseId); TrieDictionaryForest<String> dict = builder.build(); dict.dump(System.out); int expectId = baseId; for (String s : strs) { assertEquals(expectId, dict.getIdFromValue(s)); expectId++; } assertSameBehaviorAsTrie(dict, strs, baseId); }
Collections.sort(testData); TrieDictionaryForestBuilder<String> b = newDictBuilder(testData, 0); TrieDictionaryForest<String> dict = b.build(); System.out.println("tree size:" + dict.getTrees().size()); BytesConverter<String> converter = new StringBytesConverter();
@Test public void dictionaryContainTest() { ArrayList<String> str = new ArrayList<String>(); str.add("part"); str.add("part"); // meant to be dup str.add("par"); str.add("partition"); str.add("party"); str.add("parties"); str.add("paint"); Collections.sort(str, new ByteComparator<String>(new StringBytesConverter())); int baseId = new Random().nextInt(100); TrieDictionaryForestBuilder<String> b = newDictBuilder(str, baseId); TrieDictionaryForest<String> dict = b.build(); str.add("py"); Collections.sort(str, new ByteComparator<String>(new StringBytesConverter())); b = newDictBuilder(str, baseId); baseId = new Random().nextInt(100); TrieDictionaryForest<String> dict2 = b.build(); assertEquals(true, dict2.contains(dict)); assertEquals(false, dict.contains(dict2)); }
int maxSize = 100 * 1024 * 1024; TrieDictionaryForestBuilder<String> b = newDictBuilder(str, baseId, maxSize); TrieDictionaryForest<String> dict = b.build(); TreeSet<String> set = new TreeSet<String>(); for (String s : str) {
@Test public void testAllNullValue() { ArrayList<String> strs = new ArrayList<String>(); strs.add(""); int maxTreeSize = 10; TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0, maxTreeSize); TrieDictionaryForest<String> dict = builder.build(); assertEquals(1, dict.getSize()); assertEquals(0, dict.getIdFromValue("")); }
for (String str : testData) newTrieBuilder.addValue(str); TrieDictionaryForest<String> newDict = newTrieBuilder.build(); keep |= newDict.getIdFromValue(testData.get(0)); newDictTotalBuildTime += (System.currentTimeMillis() - startTime);
@Test //one string one tree public void testMultiTree() { ArrayList<String> strs = new ArrayList<String>(); strs.add("part"); strs.add("par"); strs.add("partition"); strs.add("party"); strs.add("parties"); strs.add("paint"); strs.add("一二三"); //Chinese test strs.add("四五六"); strs.add(""); Collections.sort(strs, new ByteComparator<String>(new StringBytesConverter())); int baseId = 5; int maxTreeSize = 0; TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, baseId, maxTreeSize); TrieDictionaryForest<String> dict = builder.build(); dict.dump(System.out); assertEquals(strs.size(), dict.getTrees().size()); int expectId = baseId; for (String s : strs) { assertEquals(expectId, dict.getIdFromValue(s)); expectId++; } assertSameBehaviorAsTrie(dict, strs, baseId); }
TrieDictionaryForest<String> dict = builder.build(); System.out.println("tree num:" + dict.getTrees().size());
builder.addValue(str); TrieDictionaryForest<String> dict = builder.build(); assertEquals(1, dict.getTrees().size()); for (String str : strs) builder.addValue(str); dict = builder.build(); fail("Input data no sorted and builder have multi trees. Should throw IllegalStateException"); } catch (IllegalStateException e) {
@Test public void testNullValue() { //encounter null value when building dictionary ArrayList<String> strs = new ArrayList<String>(); strs.add(null); strs.add("abc"); System.out.println(strs); int maxTreeSize = 0; TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0, maxTreeSize); TrieDictionaryForest<String> dict = builder.build(); dict.dump(System.out); //null value query int id = dict.getIdFromValue(null, 0); assertEquals(255, id); id = dict.getIdFromValue(null, 1); assertEquals(255, id); id = dict.getIdFromValue(null, -1); assertEquals(255, id); assertSameBehaviorAsTrie(dict, strs, 0); }
@Before public void before() { int dataSize = 100 * 10000; TrieDictionaryBuilder<String> b1 = new TrieDictionaryBuilder<>(new StringBytesConverter()); TrieDictionaryForestBuilder<String> b2 = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), 0, 5); this.rawData = genStringDataSet(dataSize); for (String str : this.rawData) { b1.addValue(str); b2.addValue(str); } this.oldDict = b1.build(0); this.newDict = b2.build(); System.out.println("new dict split tree size : " + ((TrieDictionaryForest<String>) newDict).getTrees().size()); }
while (it.hasNext()) builder.addValue(it.next()); TrieDictionaryForest<String> dict = builder.build(); System.out.println(dict.getTrees().size());
private static void testStringDictionary(ArrayList<String> str, ArrayList<String> notFound) { int baseId = new Random().nextInt(100); TrieDictionaryForestBuilder<String> b = newDictBuilder(str, baseId, 2); TrieDictionaryForest<String> dict = b.build(); TreeSet<String> set = new TreeSet<String>(); for (String s : str) {
@Test public void serializeTest() { ArrayList<String> testData = getTestData(10); TrieDictionaryForestBuilder<String> b = newDictBuilder(testData, 10, 0); TrieDictionaryForest<String> dict = b.build(); dict = testSerialize(dict); dict.dump(System.out); for (String str : testData) { assertEquals(str, dict.getValueFromId(dict.getIdFromValue(str))); } }
@Test public void emptyDictTest() throws Exception { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter()); TrieDictionaryForest<String> dict = b.build(); try { int id = dict.getIdFromValue("123", 0); fail("id should not exist"); } catch (IllegalArgumentException e) { //right } try { String value = dict.getValueFromIdImpl(123); fail("value should not exist"); } catch (IllegalArgumentException e) { //right } }
TrieDictionaryForest<String> dict = builder.build();
int baseId = 10; TrieDictionaryForestBuilder<String> b = TrieDictionaryForestTest.newDictBuilder(testData, baseId, 0); TrieDictionaryForest<String> dict = b.build();
@Override public Dictionary<String> build() throws IOException { return builder.build(); }