public static DictionaryDesc create(String column, String reuseColumn, String builderClass) { DictionaryDesc desc = new DictionaryDesc(); desc.column = column; desc.reuseColumn = reuseColumn; desc.builderClass = builderClass; return desc; } }
TblColRef dictCol = dictDesc.getColumnRef(); TblColRef reuseCol = dictDesc.getResuseColumnRef(); String builderClass = dictDesc.getBuilderClass(); if (!baseCols.contains(dictDesc.getResuseColumnRef())) { context.addResult(ResultLevel.ERROR, ERROR_TRANSITIVE_REUSE + dictDesc.getColumnRef()); return;
private void initDictionaryDesc() { if (dictionaries != null) { for (DictionaryDesc dictDesc : dictionaries) { dictDesc.init(this); allColumns.add(dictDesc.getColumnRef()); if (dictDesc.getResuseColumnRef() != null) { allColumns.add(dictDesc.getResuseColumnRef()); } } } }
/** * A column may reuse dictionary of another column, find the dict column, return same col if there's no reuse column */ public TblColRef getDictionaryReuseColumn(TblColRef col) { if (dictionaries == null) { return col; } for (DictionaryDesc dictDesc : dictionaries) { if (dictDesc.getColumnRef().equals(col) && dictDesc.getResuseColumnRef() != null) { return dictDesc.getResuseColumnRef(); } } return col; }
public String getDictionaryBuilderClass(TblColRef col) { if (dictionaries == null) return null; for (DictionaryDesc desc : dictionaries) { if (desc.getBuilderClass() != null) { // column that reuses other's dict need not be built, thus should not reach here if (col.equals(desc.getColumnRef())) { return desc.getBuilderClass(); } } } return null; }
/** Get a column which can be used to cluster the source table. * To reduce memory footprint in base cuboid for global dict */ // TODO handle more than one ultra high cardinality columns use global dict in one cube TblColRef getClusteredByColumn() { if (getDistributedByColumn() != null) { return null; } if (dictionaries == null) { return null; } String clusterByColumn = config.getFlatHiveTableClusterByDictColumn(); for (DictionaryDesc dictDesc : dictionaries) { if (dictDesc.getColumnRef().getName().equalsIgnoreCase(clusterByColumn)) { return dictDesc.getColumnRef(); } } return null; }
/** * Get columns that need dictionary built on it. Note a column could reuse dictionary of another column. */ public Set<TblColRef> getAllColumnsNeedDictionaryBuilt() { Set<TblColRef> result = getAllColumnsHaveDictionary(); // remove columns that reuse other's dictionary if (dictionaries != null) { for (DictionaryDesc dictDesc : dictionaries) { if (dictDesc.getResuseColumnRef() != null) { result.remove(dictDesc.getColumnRef()); result.add(dictDesc.getResuseColumnRef()); } } } return result; }
public List<TblColRef> getAllGlobalDictColumns() { List<TblColRef> globalDictCols = new ArrayList<TblColRef>(); List<DictionaryDesc> dictionaryDescList = getDictionaries(); if (dictionaryDescList == null) { return globalDictCols; } for (DictionaryDesc dictionaryDesc : dictionaryDescList) { String cls = dictionaryDesc.getBuilderClass(); if (GlobalDictionaryBuilder.class.getName().equals(cls) || SegmentAppendTrieDictBuilder.class.getName().equals(cls)) globalDictCols.add(dictionaryDesc.getColumnRef()); } return globalDictCols; }
@Test public void testBadDesc() throws IOException { testDictionaryDesc(ERROR_DUPLICATE_DICTIONARY_COLUMN, DictionaryDesc.create("ORDER_ID", null, "FakeBuilderClass")); testDictionaryDesc(ERROR_DUPLICATE_DICTIONARY_COLUMN, DictionaryDesc.create("ORDER_ID", null, GlobalDictionaryBuilder.class.getName())); }
/** * Get columns that have dictionary */ public Set<TblColRef> getAllColumnsHaveDictionary() { Set<TblColRef> result = Sets.newLinkedHashSet(); // dictionaries in dimensions result.addAll(getAllDimsHaveDictionary()); // dictionaries in measures for (MeasureDesc measure : measures) { MeasureType<?> aggrType = measure.getFunction().getMeasureType(); result.addAll(aggrType.getColumnsNeedDictionary(measure.getFunction())); } // any additional dictionaries if (dictionaries != null) { for (DictionaryDesc dictDesc : dictionaries) { TblColRef col = dictDesc.getColumnRef(); result.add(col); } } return result; }
TblColRef c = dictDesc.getColumnRef(); initAddColumn(c); if (dictDesc.getResuseColumnRef() != null) { c = dictDesc.getResuseColumnRef(); initAddColumn(c);
private void initDictionaryDesc() { if (dictionaries != null) { for (DictionaryDesc dictDesc : dictionaries) { dictDesc.init(this); allColumns.add(dictDesc.getColumnRef()); if (dictDesc.getResuseColumnRef() != null) { allColumns.add(dictDesc.getResuseColumnRef()); } } } }
TblColRef dictCol = dictDesc.getColumnRef(); TblColRef reuseCol = dictDesc.getResuseColumnRef(); String builderClass = dictDesc.getBuilderClass(); if (!baseCols.contains(dictDesc.getResuseColumnRef())) { context.addResult(ResultLevel.ERROR, ERROR_TRANSITIVE_REUSE + dictDesc.getColumnRef()); return;
public String getDictionaryBuilderClass(TblColRef col) { if (dictionaries == null) return null; for (DictionaryDesc desc : dictionaries) { if (desc.getBuilderClass() != null) { // column that reuses other's dict need not be built, thus should not reach here if (col.equals(desc.getColumnRef())) { return desc.getBuilderClass(); } } } return null; }
@Test public void testBadDesc5() throws IOException { testDictionaryDesc(ERROR_GLOBAL_DICTIONNARY_ONLY_MEASURE, DictionaryDesc.create("CATEG_LVL2_NAME", null, GlobalDictionaryBuilder.class.getName())); }
/** Get a column which can be used to cluster the source table. * To reduce memory footprint in base cuboid for global dict */ // TODO handle more than one ultra high cardinality columns use global dict in one cube TblColRef getClusteredByColumn() { if (getDistributedByColumn() != null) { return null; } if (dictionaries == null) { return null; } String clusterByColumn = config.getFlatHiveTableClusterByDictColumn(); for (DictionaryDesc dictDesc : dictionaries) { if (dictDesc.getColumnRef().getName().equalsIgnoreCase(clusterByColumn)) { return dictDesc.getColumnRef(); } } return null; }
public static DictionaryDesc create(String column, String reuseColumn, String builderClass) { DictionaryDesc desc = new DictionaryDesc(); desc.column = column; desc.reuseColumn = reuseColumn; desc.builderClass = builderClass; return desc; } }
/** * A column may reuse dictionary of another column, find the dict column, return same col if there's no reuse column */ public TblColRef getDictionaryReuseColumn(TblColRef col) { if (dictionaries == null) { return col; } for (DictionaryDesc dictDesc : dictionaries) { if (dictDesc.getColumnRef().equals(col) && dictDesc.getResuseColumnRef() != null) { return dictDesc.getResuseColumnRef(); } } return col; }
public List<TblColRef> getAllGlobalDictColumns() { List<TblColRef> globalDictCols = new ArrayList<TblColRef>(); List<DictionaryDesc> dictionaryDescList = getDictionaries(); if (dictionaryDescList == null) { return globalDictCols; } for (DictionaryDesc dictionaryDesc : dictionaryDescList) { String cls = dictionaryDesc.getBuilderClass(); if (GlobalDictionaryBuilder.class.getName().equals(cls) || SegmentAppendTrieDictBuilder.class.getName().equals(cls)) globalDictCols.add(dictionaryDesc.getColumnRef()); } return globalDictCols; }