public TrieDictionaryForest<T> build() { if (trieBuilder.isHasValue()) { //last tree TrieDictionary<T> tree = trieBuilder.build(0); addTree(tree); reset(); } TrieDictionaryForest<T> forest = new TrieDictionaryForest<T>(this.trees, this.valueDivide, this.accuOffset, this.bytesConverter, baseId); // if input values are not in ascending order and tree num>1,TrieDictionaryForest can not work correctly. if (forest.getTrees().size() > 1 && !isOrdered) { throw new IllegalStateException("Invalid input data. Unordered data can not be split into multi trees"); } return forest; }
TrieDictionaryForestBuilder<String> b = newDictBuilder(testData, 0); TrieDictionaryForest<String> dict = b.build(); System.out.println("tree size:" + dict.getTrees().size()); BytesConverter<String> converter = new StringBytesConverter(); TreeSet<String> set = new TreeSet<String>();
@Test //one string one tree public void testMultiTree() { ArrayList<String> strs = new ArrayList<String>(); strs.add("part"); strs.add("par"); strs.add("partition"); strs.add("party"); strs.add("parties"); strs.add("paint"); strs.add("一二三"); //Chinese test strs.add("四五六"); strs.add(""); Collections.sort(strs, new ByteComparator<String>(new StringBytesConverter())); int baseId = 5; int maxTreeSize = 0; TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, baseId, maxTreeSize); TrieDictionaryForest<String> dict = builder.build(); dict.dump(System.out); assertEquals(strs.size(), dict.getTrees().size()); int expectId = baseId; for (String s : strs) { assertEquals(expectId, dict.getIdFromValue(s)); expectId++; } assertSameBehaviorAsTrie(dict, strs, baseId); }
System.out.println("tree num:" + dict.getTrees().size());
assertEquals(1, dict.getTrees().size());
builder.addValue(it.next()); TrieDictionaryForest<String> dict = builder.build(); System.out.println(dict.getTrees().size());
@Before public void before() { int dataSize = 100 * 10000; TrieDictionaryBuilder<String> b1 = new TrieDictionaryBuilder<>(new StringBytesConverter()); TrieDictionaryForestBuilder<String> b2 = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), 0, 5); this.rawData = genStringDataSet(dataSize); for (String str : this.rawData) { b1.addValue(str); b2.addValue(str); } this.oldDict = b1.build(0); this.newDict = b2.build(); System.out.println("new dict split tree size : " + ((TrieDictionaryForest<String>) newDict).getTrees().size()); }
System.out.println("tree size:" + dict.getTrees().size()); System.out.println("--------------dict-----------------"); dict.dump(System.out);
public TrieDictionaryForest<T> build() { if (trieBuilder.isHasValue()) { //last tree TrieDictionary<T> tree = trieBuilder.build(0); addTree(tree); reset(); } TrieDictionaryForest<T> forest = new TrieDictionaryForest<T>(this.trees, this.valueDivide, this.accuOffset, this.bytesConverter, baseId); // if input values are not in ascending order and tree num>1,TrieDictionaryForest can not work correctly. if (forest.getTrees().size() > 1 && !isOrdered) { throw new IllegalStateException("Invalid input data. Unordered data can not be split into multi trees"); } return forest; }