private void initDictInfo(Dictionary<String> newDict, DictionaryInfo newDictInfo) { newDictInfo.setCardinality(newDict.getSize()); newDictInfo.setDictionaryObject(newDict); newDictInfo.setDictionaryClass(newDict.getClass().getName()); }
private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo) throws IOException { if (dictInfo == null) return; // work on copy instead of cached objects CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid()); Dictionary<?> dict = dictInfo.getDictionaryObject(); segCopy.putDictResPath(col, dictInfo.getResourcePath()); segCopy.getRowkeyStats().add(new Object[] { col.getIdentity(), dict.getSize(), dict.getSizeOfId() }); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(segCopy); updateCube(update); }
public List<T> enumeratorValues() { List<T> ret = Lists.newArrayListWithExpectedSize(getSize()); for (int i = getMinId(); i <= getMaxId(); i++) { ret.add(getValueFromId(i)); } return ret; }
private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo) throws IOException { if (dictInfo == null) return; // work on copy instead of cached objects CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid()); Dictionary<?> dict = dictInfo.getDictionaryObject(); segCopy.putDictResPath(col, dictInfo.getResourcePath()); segCopy.getRowkeyStats().add(new Object[] { col.getIdentity(), dict.getSize(), dict.getSizeOfId() }); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(segCopy); updateCube(update); }
logger.debug("Dictionary cardinality: " + dict.getSize()); logger.debug("Dictionary builder class: " + builder.getClass().getName()); logger.debug("Dictionary class: " + dict.getClass().getName());
@Override public boolean contains(Dictionary other) { if (other.getSize() > this.getSize()) { return false; } for (int i = other.getMinId(); i <= other.getMaxId(); ++i) { T v = (T) other.getValueFromId(i); if (!this.containsValue(v)) { return false; } } return true; }
@Override public boolean contains(Dictionary other) { if (other.getSize() > this.getSize()) { return false; } for (int i = other.getMinId(); i <= other.getMaxId(); ++i) { T v = (T) other.getValueFromId(i); if (!this.containsValue(v)) { return false; } } return true; }
@Ignore @SuppressWarnings("unchecked") @Test public void testEmptyInput() throws IOException { String[] ints = new String[] { "", "0", "5", "100", "13" }; // check "" is treated as NULL, not a code of dictionary Dictionary<?> dict = DictionaryGenerator.buildDictionary(DataType.getType("integer"), new IterableDictionaryValueEnumerator(ints)); assertEquals(4, dict.getSize()); final int id = ((NumberDictionary<String>) dict).getIdFromValue(""); assertEquals(id, dict.nullId()); }
assertEquals(3, info1.getDictionaryObject().getSize());
public List<T> enumeratorValues() { List<T> ret = Lists.newArrayListWithExpectedSize(getSize()); for (int i = getMinId(); i <= getMaxId(); i++) { ret.add(getValueFromId(i)); } return ret; }
private void initDictInfo(Dictionary<String> newDict, DictionaryInfo newDictInfo) { newDictInfo.setCardinality(newDict.getSize()); newDictInfo.setDictionaryObject(newDict); newDictInfo.setDictionaryClass(newDict.getClass().getName()); }
public static org.apache.kylin.common.util.Dictionary<?> buildDictionaryFromValueEnumerator(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator) throws IOException{ org.apache.kylin.common.util.Dictionary dict = null; int baseId = 0; // always 0 for now final int nSamples = 5; ArrayList samples = Lists.newArrayListWithCapacity(nSamples); // build dict, case by data type DataType dataType = DataType.getInstance(info.getDataType()); if (dataType.isDateTimeFamily()) dict = buildDateStrDict(valueEnumerator, baseId, nSamples, samples); else if (dataType.isNumberFamily()) dict = buildNumberDict(valueEnumerator, baseId, nSamples, samples); else dict = buildStringDict(valueEnumerator, baseId, nSamples, samples); // log a few samples StringBuilder buf = new StringBuilder(); for (Object s : samples) { if (buf.length() > 0) buf.append(", "); buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s)); } logger.info("Dictionary value samples: " + buf.toString()); logger.info("Dictionary cardinality: " + dict.getSize()); if (dict instanceof TrieDictionary && dict.getSize() > DICT_MAX_CARDINALITY) throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + dict.getSize()); return dict; }
logger.debug("Dictionary cardinality: " + dict.getSize()); logger.debug("Dictionary builder class: " + builder.getClass().getName()); logger.debug("Dictionary class: " + dict.getClass().getName());
public DictionaryInfo trySaveNewDict(Dictionary<?> newDict, DictionaryInfo newDictInfo) throws IOException { String dupDict = checkDupByContent(newDictInfo, newDict); if (dupDict != null) { logger.info("Identical dictionary content " + newDict + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } newDictInfo.setCardinality(newDict.getSize()); newDictInfo.setDictionaryObject(newDict); newDictInfo.setDictionaryClass(newDict.getClass().getName()); save(newDictInfo); dictCache.put(newDictInfo.getResourcePath(), newDictInfo); return newDictInfo; }
@Override public boolean contains(Dictionary other) { if (other.getSize() > this.getSize()) { return false; } for (int i = other.getMinId(); i <= other.getMaxId(); ++i) { T v = (T) other.getValueFromId(i); if (!this.containsValue(v)) { return false; } } return true; }
@Override public boolean contains(Dictionary other) { if (other.getSize() > this.getSize()) { return false; } for (int i = other.getMinId(); i <= other.getMaxId(); ++i) { T v = (T) other.getValueFromId(i); if (!this.containsValue(v)) { return false; } } return true; }