private static TrieDictionaryForest<String> testSerialize(TrieDictionaryForest<String> dict) { try { ByteArrayOutputStream bout = new ByteArrayOutputStream(); DataOutputStream dataout = new DataOutputStream(bout); dict.write(dataout); dataout.close(); ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray()); DataInputStream datain = new DataInputStream(bin); TrieDictionaryForest<String> r = new TrieDictionaryForest<>(); //r.dump(System.out); r.readFields(datain); //r.dump(System.out); datain.close(); return r; } catch (IOException e) { throw new RuntimeException(e); } }
assertEquals(id, dict.getIdFromValue(value)); assertEquals(value, dict.getValueFromId(id)); assertArrayEquals(Bytes.toBytes(value), dict.getValueByteFromId(id)); for (String s : notFound) { try { int nullId = dict.getIdFromValue(s); System.out.println("null value id:" + nullId); fail("For not found value '" + s + "', IllegalArgumentException is expected"); int maxId = dict.getMaxId(); int[] notExistIds = { -10, -20, -Integer.MIN_VALUE, -Integer.MAX_VALUE, maxId + 1, maxId + 2 }; for (Integer i : notExistIds) { try { dict.getValueFromId(i); fail("For not found id '" + i + "', IllegalArgumentException is expected"); } catch (IllegalArgumentException e) { int nullId = dict.getIdFromValue(null); assertNull(dict.getValueFromId(nullId)); assertNull(dict.getValueByteFromId(nullId));
} else { ByteArray search = new ByteArray(value, offset, len); index = findIndexByValue(search); if (index < 0) { if (roundingFlag > 0) { return getMinId(); //searching value smaller than the smallest value in dict } else { throw new IllegalArgumentException("Value '" + Bytes.toString(value, offset, len) + "' (" + Bytes.toStringBinary(value, offset, len) + ") not exists!");
public TrieDictionaryForest<T> build() { if (trieBuilder.isHasValue()) { //last tree TrieDictionary<T> tree = trieBuilder.build(0); addTree(tree); reset(); } TrieDictionaryForest<T> forest = new TrieDictionaryForest<T>(this.trees, this.valueDivide, this.accuOffset, this.bytesConverter, baseId); // if input values are not in ascending order and tree num>1,TrieDictionaryForest can not work correctly. if (forest.getTrees().size() > 1 && !isOrdered) { throw new IllegalStateException("Invalid input data. Unordered data can not be split into multi trees"); } return forest; }
@Override public boolean contains(Dictionary other) { if (other.getSize() > this.getSize()) { return false; } for (int i = other.getMinId(); i <= other.getMaxId(); ++i) { T v = (T) other.getValueFromId(i); if (!this.containsValue(v)) { return false; } } return true; }
private void assertSameBehaviorAsTrie(TrieDictionaryForest<String> dict, ArrayList<String> strs, int baseId) { TrieDictionaryBuilder<String> trieBuilder = new TrieDictionaryBuilder<>(new StringBytesConverter()); for (String s : strs) { if (s != null) trieBuilder.addValue(s); } TrieDictionary<String> trie = trieBuilder.build(baseId); assertEquals(trie.getMaxId(), dict.getMaxId()); assertEquals(trie.getMinId(), dict.getMinId()); assertEquals(trie.getSize(), dict.getSize()); assertEquals(trie.getSizeOfId(), dict.getSizeOfId()); assertEquals(trie.getSizeOfValue(), dict.getSizeOfValue()); }
@Test public void testMerge() { // mimic the logic as in MergeCuboidMapper NumberDictionaryForestBuilder b1 = new NumberDictionaryForestBuilder(); b1.addValue("0"); b1.addValue("3"); b1.addValue("23"); TrieDictionaryForest<String> dict1 = b1.build(); NumberDictionaryForestBuilder b2 = new NumberDictionaryForestBuilder(); b2.addValue("0"); b2.addValue("2"); b2.addValue("3"); b2.addValue("15"); b2.addValue("23"); TrieDictionaryForest<String> dict2 = b2.build(); assertTrue(dict1.getSizeOfId() == dict2.getSizeOfId()); assertTrue(dict1.getSizeOfValue() == dict2.getSizeOfValue()); { int newId = dict2.getIdFromValue(dict1.getValueFromId(0)); assertTrue(newId == 0); } { int newId = dict2.getIdFromValue(dict1.getValueFromId(1)); assertTrue(newId == 2); } { int newId = dict2.getIdFromValue(dict1.getValueFromId(2)); assertTrue(newId == 4); } }
System.out.println("tree size:" + dict.getTrees().size()); System.out.println("--------------dict-----------------"); dict.dump(System.out); System.out.println("--------------set-------------------"); System.out.println(set); int id1 = dict.getIdFromValue(query1, 1); String actualValue = dict.getValueFromId(id1); continue; try { int id = dict.getIdFromValue(query, 1); assertEquals(set.ceiling(query), dict.getValueFromId(id)); } catch (IllegalArgumentException e) { assertNull(set.ceiling(query)); continue; try { int id = dict.getIdFromValue(query, -1); assertEquals(set.floor(query), dict.getValueFromId(id)); } catch (IllegalArgumentException e) { assertNull(set.floor(query));
System.out.println("tree num:" + dict.getTrees().size()); assertEquals(0, dict.getIdFromValue(entry.getKey()) - entry.getValue()); assertEquals(entry.getKey(), dict.getValueFromId(entry.getValue()));
@Test public void serializeTest() { List<String> testData = new ArrayList<>(); testData.add("1"); testData.add("2"); testData.add("100"); //TrieDictionaryForestBuilder.MaxTrieTreeSize = 0; NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(); for (String str : testData) b.addValue(str); TrieDictionaryForest<String> dict = b.build(); dict = testSerialize(dict); //dict.dump(System.out); for (String str : testData) { assertEquals(str, dict.getValueFromId(dict.getIdFromValue(str))); } }
@Test public void serializeTest() { ArrayList<String> testData = getTestData(10); TrieDictionaryForestBuilder<String> b = newDictBuilder(testData, 10, 0); TrieDictionaryForest<String> dict = b.build(); dict = testSerialize(dict); dict.dump(System.out); for (String str : testData) { assertEquals(str, dict.getValueFromId(dict.getIdFromValue(str))); } }
@Test //one string one tree public void testMultiTree() { ArrayList<String> strs = new ArrayList<String>(); strs.add("part"); strs.add("par"); strs.add("partition"); strs.add("party"); strs.add("parties"); strs.add("paint"); strs.add("一二三"); //Chinese test strs.add("四五六"); strs.add(""); Collections.sort(strs, new ByteComparator<String>(new StringBytesConverter())); int baseId = 5; int maxTreeSize = 0; TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, baseId, maxTreeSize); TrieDictionaryForest<String> dict = builder.build(); dict.dump(System.out); assertEquals(strs.size(), dict.getTrees().size()); int expectId = baseId; for (String s : strs) { assertEquals(expectId, dict.getIdFromValue(s)); expectId++; } assertSameBehaviorAsTrie(dict, strs, baseId); }
@Test public void testAllNullValue() { ArrayList<String> strs = new ArrayList<String>(); strs.add(""); int maxTreeSize = 10; TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0, maxTreeSize); TrieDictionaryForest<String> dict = builder.build(); assertEquals(1, dict.getSize()); assertEquals(0, dict.getIdFromValue("")); }
int times = Math.max(10 * 1000 * 1000 / n, 1); // run 10 million lookups int keep = 0; // make sure JIT don't OPT OUT function calls under test byte[] valueBytes = new byte[dict.getSizeOfValue()]; long start; for (int j = 0; j < n; j++) { keep |= dict.getIdFromValueBytesWithoutCache(array[j], 0, array[j].length, 0); for (int i = 0; i < times; i++) { for (int j = 0; j < n; j++) { keep |= dict.getValueBytesFromIdWithoutCache(j).length;
@Test public void testNullValue() { //encounter null value when building dictionary ArrayList<String> strs = new ArrayList<String>(); strs.add(null); strs.add("abc"); System.out.println(strs); int maxTreeSize = 0; TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0, maxTreeSize); TrieDictionaryForest<String> dict = builder.build(); dict.dump(System.out); //null value query int id = dict.getIdFromValue(null, 0); assertEquals(255, id); id = dict.getIdFromValue(null, 1); assertEquals(255, id); id = dict.getIdFromValue(null, -1); assertEquals(255, id); assertSameBehaviorAsTrie(dict, strs, 0); }
@Test public void emptyDictTest() throws Exception { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter()); TrieDictionaryForest<String> dict = b.build(); try { int id = dict.getIdFromValue("123", 0); fail("id should not exist"); } catch (IllegalArgumentException e) { //right } try { String value = dict.getValueFromIdImpl(123); fail("value should not exist"); } catch (IllegalArgumentException e) { //right } }
TrieDictionaryForestBuilder<String> b = newDictBuilder(testData, 0); TrieDictionaryForest<String> dict = b.build(); System.out.println("tree size:" + dict.getTrees().size()); BytesConverter<String> converter = new StringBytesConverter(); TreeSet<String> set = new TreeSet<String>();
int id; try { id = dict.getIdFromValue(smallerStr, 0); fail("should throw IllegalArgumentException,but id is:" + id); } catch (IllegalArgumentException e) { id = dict.getIdFromValue(smallerStr, -1); fail("should throw IllegalArgumentException,but id is:" + id); } catch (IllegalArgumentException e) { id = dict.getIdFromValue(smallerStr, 1); assertEquals(baseId, id); id = dict.getIdFromValue(middleStr, 0); fail("should throw IllegalArgumentException,but id is:" + id); } catch (IllegalArgumentException e) { id = dict.getIdFromValue(middleStr, -1); assertEquals(baseId, id); id = dict.getIdFromValue(middleStr, 1); assertEquals(baseId + 1, id); id = dict.getIdFromValue(rightStr, 0); fail("should throw IllegalArgumentException,but id is:" + id); } catch (IllegalArgumentException e) { id = dict.getIdFromValue(rightStr, -1); assertEquals(baseId + 2, id); try {
builder.addValue(it.next()); TrieDictionaryForest<String> dict = builder.build(); System.out.println(dict.getTrees().size()); String query = rand.nextInt(2 * num) + ""; try { int id = dict.getIdFromValue(query, 1); assertEquals(set.ceiling(query), dict.getValueFromId(id)); } catch (IllegalArgumentException e) { assertNull(set.ceiling(query)); String query = rand.nextInt(2 * num) + ""; try { int id = dict.getIdFromValue(query, -1); assertEquals(set.floor(query), dict.getValueFromId(id)); } catch (IllegalArgumentException e) { assertNull(set.floor(query));
int actualId = dict.getIdFromValue(value); assertTrue(actualId >= id); assertTrue(actualId > previousId); previousId = actualId; assertEquals(value, dict.getValueFromId(actualId));