void save(DictionaryInfo dict) throws IOException { ResourceStore store = getStore(); String path = dict.getResourcePath(); logger.info("Saving dictionary at " + path); store.putBigResource(path, dict, System.currentTimeMillis(), DictionaryInfoSerializer.FULL_SERIALIZER); }
private DictionaryInfo saveNewDict(DictionaryInfo newDictInfo) throws IOException { save(newDictInfo); dictCache.put(newDictInfo.getResourcePath(), newDictInfo); return newDictInfo; }
private DictionaryInfo mergeDictionaries(DictionaryManager dictMgr, CubeSegment cubeSeg, List<DictionaryInfo> dicts, TblColRef col) throws IOException { DictionaryInfo dictInfo = dictMgr.mergeDictionary(dicts); if (dictInfo != null) cubeSeg.putDictResPath(col, dictInfo.getResourcePath()); return dictInfo; }
@SuppressWarnings("unchecked") public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment, Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) { Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap(); for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) { final TblColRef tblColRef = entry.getKey(); final Dictionary<String> dictionary = entry.getValue(); IReadableTable.TableSignature signature = new IReadableTable.TableSignature(); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset)); signature.setSize(endOffset - startOffset); DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature); logger.info("writing dictionary for TblColRef:" + tblColRef.toString()); DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig()); try { DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo); cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath()); realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject()); } catch (IOException e) { throw new RuntimeException("error save dictionary for column:" + tblColRef, e); } } return realDictMap; }
DictionaryInfo largestDictInfo = findLargestDictInfo(newDictInfo); if (largestDictInfo != null) { largestDictInfo = getDictionaryInfo(largestDictInfo.getResourcePath()); Dictionary<String> largestDictObject = largestDictInfo.getDictionaryObject(); if (largestDictObject.contains(newDict)) { logger.info("dictionary content " + newDict + ", is contained by dictionary at " + largestDictInfo.getResourcePath()); return largestDictInfo; } else if (newDict.contains(largestDictObject)) {
if (!firstDictInfo.isDictOnSameColumn(info)) { logger.warn("Merging dictionaries are not structurally equal : " + firstDictInfo.getResourcePath() + " and " + info.getResourcePath());
String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath();
private String checkDupByInfo(DictionaryInfo dictInfo) throws IOException { final ResourceStore store = getStore(); final List<DictionaryInfo> allResources = store.getAllResources(dictInfo.getResourceDir(), DictionaryInfoSerializer.INFO_SERIALIZER); TableSignature input = dictInfo.getInput(); for (DictionaryInfo dictionaryInfo : allResources) { if (input.equals(dictionaryInfo.getInput())) { return dictionaryInfo.getResourcePath(); } } return null; }
String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath();
newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo) throws IOException { if (dictInfo == null) return; // work on copy instead of cached objects CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid()); Dictionary<?> dict = dictInfo.getDictionaryObject(); segCopy.putDictResPath(col, dictInfo.getResourcePath()); segCopy.getRowkeyStats().add(new Object[] { col.getIdentity(), dict.getSize(), dict.getSizeOfId() }); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(segCopy); updateCube(update); }
for (Map.Entry<String, String> entry : segment.getDictionaries().entrySet()) { if (entry.getValue().equalsIgnoreCase(item)) { entry.setValue(dictSaved.getResourcePath()); logger.info("Item " + item + " is dup, instead " + dictSaved.getResourcePath() + " is reused");
for (Map.Entry<String, String> entry : segment.getDictionaries().entrySet()) { if (entry.getValue().equalsIgnoreCase(item)) { entry.setValue(dictSaved.getResourcePath()); logger.info("Item " + item + " is dup, instead " + dictSaved.getResourcePath() + " is reused");
private DictionaryInfo mergeDictionaries(DictionaryManager dictMgr, CubeSegment cubeSeg, List<DictionaryInfo> dicts, TblColRef col) throws IOException { DictionaryInfo dictInfo = dictMgr.mergeDictionary(dicts); cubeSeg.putDictResPath(col, dictInfo.getResourcePath()); return dictInfo; }
public DictionaryInfo trySaveNewDict(Dictionary<?> newDict, DictionaryInfo newDictInfo) throws IOException { String dupDict = checkDupByContent(newDictInfo, newDict); if (dupDict != null) { logger.info("Identical dictionary content " + newDict + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } newDictInfo.setDictionaryObject(newDict); newDictInfo.setDictionaryClass(newDict.getClass().getName()); save(newDictInfo); dictCache.put(newDictInfo.getResourcePath(), newDictInfo); return newDictInfo; }
void save(DictionaryInfo dict) throws IOException { ResourceStore store = MetadataManager.getInstance(config).getStore(); String path = dict.getResourcePath(); logger.info("Saving dictionary at " + path); store.putResource(path, dict, DictionaryInfoSerializer.FULL_SERIALIZER); }
public void buildInvertedIndexDictionary(IISegment iiSeg, String factColumnsPath) throws IOException { logger.info("Start building ii dictionary"); DictionaryManager dictMgr = getDictionaryManager(); IIDesc iiDesc = iiSeg.getIIInstance().getDescriptor(); for (TblColRef column : iiDesc.listAllColumns()) { logger.info("Dealing with column {}", column); if (iiDesc.isMetricsCol(column)) { continue; } DictionaryInfo dict = dictMgr.buildDictionary(iiDesc.getModel(), "true", column, factColumnsPath); iiSeg.putDictResPath(column, dict.getResourcePath()); } saveResource(iiSeg.getIIInstance()); }
public DictionaryInfo buildDictionary(CubeSegment cubeSeg, TblColRef col, String factColumnsPath) throws IOException { CubeDesc cubeDesc = cubeSeg.getCubeDesc(); if (!cubeDesc.getRowkey().isUseDictionary(col)) return null; DictionaryManager dictMgr = getDictionaryManager(); DictionaryInfo dictInfo = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, factColumnsPath); cubeSeg.putDictResPath(col, dictInfo.getResourcePath()); saveResource(cubeSeg.getCubeInstance()); return dictInfo; }
@Test @Ignore("hive not ready") public void basic() throws Exception { CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_without_slr_desc"); TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_CATEGORY_GROUPINGS", "META_CATEG_NAME"); DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null); System.out.println(JsonUtil.writeValueAsIndentString(info1)); DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null); System.out.println(JsonUtil.writeValueAsIndentString(info2)); assertTrue(info1.getUuid() == info2.getUuid()); assertTrue(info1 == dictMgr.getDictionaryInfo(info1.getResourcePath())); assertTrue(info2 == dictMgr.getDictionaryInfo(info2.getResourcePath())); assertTrue(info1.getDictionaryObject() == info2.getDictionaryObject()); touchDictValues(info1); }
((TrieDictionary) dict).dump(System.out); segment.putDictResPath(lfn, newDictInfo.getResourcePath()); segment.putDictResPath(lsi, sharedDict.getResourcePath()); segment.putDictResPath(ssc, sharedDict.getResourcePath());