public JsonTableGroupScan(String userName, AbstractStoragePlugin storagePlugin, MapRDBFormatPlugin formatPlugin, JsonScanSpec scanSpec, List<SchemaPath> columns) { super(storagePlugin, formatPlugin, columns, userName); this.scanSpec = scanSpec; this.stats = new MapRDBStatistics(); this.forcedRowCountMap = new HashMap<>(); init(); }
populateStatsForNoFilter(jTabGrpScan, indexes, scanRel, context); statsAvailable = true; return; addToCache(null, jTabGrpScan.getAverageRowSizeStats(null), ftsPayload); if (ftsPayload == null || ftsPayload.getRowCount() == 0) { return; addToCache(idx, idxRowSizePayload, ftsPayload); IndexCollection distFKeyIndexes = distinctFKeyIndexes(indexes, scanRel); IndexConditionInfo.Builder infoBuilder = IndexConditionInfo.newBuilder(condition, distFKeyIndexes, builder, scanRel); RexNode preProcIdxCondition = convertToStatsCondition(idxCondition, idx, context, scanRel, Arrays.asList(SqlKind.CAST, SqlKind.LIKE)); RelDataType newRowType; convertToLogicalExpression(preProcIdxCondition, newRowType, settings, builder)); double avgRowSize = Math.min(idxPayload.getAvgRowSize(), ftsPayload.getAvgRowSize()); StatisticsPayload payload = new MapRDBStatisticsPayload(rowCount, leadingRowCount, avgRowSize); addToCache(idxCondition, idx, context, payload, jTabGrpScan, scanRel, newRowType); addBaseConditions(idxCondition, payload, false, baseConditionMap, scanRel.getRowType()); rowCount = ftsPayload.getRowCount() * computeSelectivity(idxLeadColCondition, idx, ftsPayload.getRowCount(), scanRel, baseConditionMap).left; leadingRowCount = rowCount; avgRowSize = fIStatsCache.get(buildUniqueIndexIdentifier(idx)).getAvgRowSize();
private RexNode convertToStatsCondition(RexNode condition, IndexDescriptor index, IndexCallContext context, RelNode scanRel, List<SqlKind>typesToProcess) { RexBuilder builder = scanRel.getCluster().getRexBuilder(); if (condition.getKind() == SqlKind.AND) { final List<RexNode> conditions = Lists.newArrayList(); for(RexNode pred : RelOptUtil.conjunctions(condition)) { conditions.add(convertToStatsCondition(pred, index, context, scanRel, typesToProcess)); } return RexUtil.composeConjunction(builder, conditions, false); } else if (condition.getKind() == SqlKind.OR) { final List<RexNode> conditions = Lists.newArrayList(); for(RexNode pred : RelOptUtil.disjunctions(condition)) { conditions.add(convertToStatsCondition(pred, index, context, scanRel, typesToProcess)); } return RexUtil.composeDisjunction(builder, conditions, false); } else if (condition instanceof RexCall) { // LIKE operator - convert to a RANGE predicate, if possible if (typesToProcess.contains(SqlKind.LIKE) && ((RexCall) condition).getOperator().getKind() == SqlKind.LIKE) { return convertLikeToRange((RexCall)condition, builder); } else if (typesToProcess.contains(SqlKind.CAST) && hasCastExpression(condition)) { return convertCastForFIdx(((RexCall) condition), index, context, scanRel); } else { return condition; } } return condition; }
/** * Get the row count after applying the {@link RexNode} condition * @param condition, filter to apply * @return row count post filtering */ @Override @JsonIgnore public double getRowCount(RexNode condition, RelNode scanRel) { // Do not use statistics if row count is forced. Forced rowcounts take precedence over stats double rowcount; if (forcedRowCountMap.get(condition) != null) { return forcedRowCountMap.get(condition); } if (scanSpec.getIndexDesc() != null) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexName()); rowcount = stats.getRowCount(condition, idxIdentifier, scanRel); } else { rowcount = stats.getRowCount(condition, null, scanRel); } // Stats might NOT have the full rows (e.g. table is newly populated and DB stats APIs return it after // 15 mins). Use the table rows as populated using the (expensive but accurate) Hbase API if needed. if (condition == null && (rowcount == 0 || rowcount == ROWCOUNT_UNKNOWN)) { rowcount = fullTableRowCount; logger.debug("getRowCount: Stats not available yet! Use Admin APIs full table rowcount {}", fullTableRowCount); } return rowcount; }
private boolean addBaseConditions(RexNode condition, StatisticsPayload payload, boolean redundant, Map<String, Double> baseConditionMap, RelDataType rowType) { boolean res = redundant; if (condition.getKind() == SqlKind.AND) { for(RexNode pred : RelOptUtil.conjunctions(condition)) { res = addBaseConditions(pred, payload, res, baseConditionMap, rowType); } } else if (condition.getKind() == SqlKind.OR) { for(RexNode pred : RelOptUtil.disjunctions(condition)) { res = addBaseConditions(pred, payload, res, baseConditionMap, rowType); } } else { // base condition String conditionAsStr = convertRexToString(condition, rowType); if (!redundant) { baseConditionMap.put(conditionAsStr, payload.getRowCount()); return true; } else { baseConditionMap.put(conditionAsStr, -1.0); return false; } } return res; } /*
for(RexNode pred : RelOptUtil.conjunctions(condition)) { if (first) { sb.append(convertRexToString(pred, rowType)); first = false; } else { sb.append(" " + SqlKind.AND.toString() + " "); sb.append(convertRexToString(pred, rowType)); for(RexNode pred : RelOptUtil.disjunctions(condition)) { if (first) { sb.append(convertRexToString(pred, rowType)); first = false; } else { sb.append(" " + SqlKind.OR.toString() + " "); sb.append(convertRexToString(pred, rowType)); getInputRefMapping(condition, rowType, inputRefMapping); if (inputRefMapping.keySet().size() > 0) {
String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName()); double rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier); double leadingRowCount = stats.getLeadingRowCount(scanSpec.getCondition(), idxIdentifier); double avgRowSize = stats.getAvgRowSize(idxIdentifier, false);
RexBuilder builder = scanRel.getCluster().getRexBuilder(); PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster()); String conditionAsStr = convertRexToString(condition, scanRel.getRowType()); if (statsCache.get(conditionAsStr) == null && payload.getRowCount() != Statistics.ROWCOUNT_UNKNOWN) { Map<String, StatisticsPayload> payloadMap = new HashMap<>(); payloadMap.put(buildUniqueIndexIdentifier(idx), payload); statsCache.put(conditionAsStr, payloadMap); logger.debug("Statistics: StatsCache:<{}, {}>",conditionAsStr, payload); RexNode preProcIdxCondition = convertToStatsCondition(condition, idx, context, scanRel, Arrays.asList(SqlKind.CAST)); QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(preProcIdxCondition, rowType, settings, builder)); if (queryCondition != null) { Map<String, StatisticsPayload> payloadMap = statsCache.get(conditionAsStr); if (payloadMap != null) { if (payloadMap.get(buildUniqueIndexIdentifier(idx)) == null) { payloadMap.put(buildUniqueIndexIdentifier(idx), payload);
@Override public boolean isDistributed() { // getMaxParallelizationWidth gets information about all regions to scan and is expensive. // This option is meant to be used only for unit tests. boolean useNumRegions = storagePlugin.getContext().getConfig().getBoolean(PluginConstants.JSON_TABLE_USE_NUM_REGIONS_FOR_DISTRIBUTION_PLANNING); double fullTableSize; if (useNumRegions) { return getMaxParallelizationWidth() > 1 ? true: false; } // This function gets called multiple times during planning. To avoid performance // bottleneck, estimate degree of parallelization using stats instead of actually getting information // about all regions. double rowCount, rowSize; double scanRangeSize = storagePlugin.getContext().getConfig().getInt(PluginConstants.JSON_TABLE_SCAN_SIZE_MB) * 1024 * 1024; if (scanSpec.getIndexDesc() != null) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexName()); rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier); rowSize = stats.getAvgRowSize(idxIdentifier, false); } else { rowCount = stats.getRowCount(scanSpec.getCondition(), null); rowSize = stats.getAvgRowSize(null, false); } if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0 || rowSize == AVG_ROWSIZE_UNKNOWN || rowSize == 0) { fullTableSize = (scanSpec.getSerializedFilter() != null ? .5 : 1) * this.fullTableEstimatedSize; } else { fullTableSize = rowCount * rowSize; } return (long) fullTableSize / scanRangeSize > 1 ? true : false; }
final TreeMap<TabletFragmentInfo, String> regionsToScan = new TreeMap<>(); if (isIndexScan()) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName()); if (stats.isStatsAvailable()) { estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier); if (stats.isStatsAvailable()) { estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), null);
public boolean initialize(RexNode condition, DrillScanRelBase scanRel, IndexCallContext context) { GroupScan scan = IndexPlanUtils.getGroupScan(scanRel); PlannerSettings settings = PrelUtil.getPlannerSettings(scanRel.getCluster().getPlanner()); rowKeyJoinBackIOFactor = settings.getIndexRowKeyJoinCostFactor(); if (scan instanceof DbGroupScan) { String conditionAsStr = convertRexToString(condition, scanRel.getRowType()); if (statsCache.get(conditionAsStr) == null) { IndexCollection indexes = ((DbGroupScan)scan).getSecondaryIndexCollection(scanRel); populateStats(condition, indexes, scanRel, context); logger.info("index_plan_info: initialize: scanRel #{} and groupScan {} got fulltable {}, statsCache: {}, fiStatsCache: {}", scanRel.getId(), System.identityHashCode(scan), fullTableScanPayload, statsCache, fIStatsCache); return true; } } return false; }
String firstCol = convertLExToStr(index.getIndexColumns().get(0)); if (firstColIndexMap.get(firstCol) != null) { idxList = firstColIndexMap.get(firstCol); IndexDescriptor selectedIdx = null; for (IndexDescriptor idx : indexesSameFirstCol) { String tabIdxIdentifier = buildUniqueIndexIdentifier(idx); double idxRowSize = fIStatsCache.get(tabIdxIdentifier).getAvgRowSize();
return new Pair<>(1.0, true); String conditionAsStr = convertRexToString(condition, scanRel.getRowType()); if (condition.getKind() == SqlKind.AND) { selectivity = 1.0; for (RexNode pred : RelOptUtil.conjunctions(condition)) { Pair<Double, Boolean> selPayload = computeSelectivity(pred, idx, totalRows, scanRel, baseConditionMap); if (selPayload.left > 0) { selectivity = 0.0; for (RexNode pred : RelOptUtil.disjunctions(condition)) { Pair<Double, Boolean> selPayload = computeSelectivity(pred, idx, totalRows, scanRel, baseConditionMap); if (selPayload.left > 0.0) {
return fullTableScanPayload.getRowCount(); } else if (condition != null) { conditionAsStr = convertRexToString(condition, scanRel.getRowType()); payloadMap = statsCache.get(conditionAsStr); if (payloadMap != null) {
private void addToCache(IndexDescriptor idx, StatisticsPayload payload, StatisticsPayload ftsPayload) { String tabIdxIdentifier = buildUniqueIndexIdentifier(idx); if (fIStatsCache.get(tabIdxIdentifier) == null) { if (ftsPayload.getAvgRowSize() >= payload.getAvgRowSize()) { fIStatsCache.put(tabIdxIdentifier, payload); logger.debug("Statistics: fIStatsCache:<{}, {}>",tabIdxIdentifier, payload); } else { StatisticsPayload cappedPayload = new MapRDBStatisticsPayload(ROWCOUNT_UNKNOWN, ROWCOUNT_UNKNOWN, ftsPayload.getAvgRowSize()); fIStatsCache.put(tabIdxIdentifier,cappedPayload); logger.debug("Statistics: fIStatsCache:<{}, {}> (Capped)",tabIdxIdentifier, cappedPayload); } } else { logger.debug("Statistics: Average row size already exists for :<{}, {}>. Skip!",tabIdxIdentifier, payload); } }
/** * This function computes statistics when there is no query condition * @param jTabGrpScan - The current group scan * @param indexes - The collection of indexes to use for getting statistics * @param scanRel - The current scanRel * @param context - The index plan call context */ private void populateStatsForNoFilter(JsonTableGroupScan jTabGrpScan, IndexCollection indexes, RelNode scanRel, IndexCallContext context) { // Get the stats payload for full table (has total rows in the table) StatisticsPayload ftsPayload = jTabGrpScan.getFirstKeyEstimatedStats(null, null, scanRel); addToCache(null, null, context, ftsPayload, jTabGrpScan, scanRel, scanRel.getRowType()); addToCache(null, jTabGrpScan.getAverageRowSizeStats(null), ftsPayload); // Get the stats for all indexes for (IndexDescriptor idx: indexes) { StatisticsPayload idxPayload = jTabGrpScan.getFirstKeyEstimatedStats(null, idx, scanRel); StatisticsPayload idxRowSizePayload = jTabGrpScan.getAverageRowSizeStats(idx); RelDataType newRowType; FunctionalIndexInfo functionInfo = idx.getFunctionalInfo(); if (functionInfo.hasFunctional()) { newRowType = FunctionalIndexHelper.rewriteFunctionalRowType(scanRel, context, functionInfo); } else { newRowType = scanRel.getRowType(); } addToCache(null, idx, context, idxPayload, jTabGrpScan, scanRel, newRowType); addToCache(idx, idxRowSizePayload, ftsPayload); } }
String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName()); double rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier); double leadingRowCount = stats.getLeadingRowCount(scanSpec.getCondition(), idxIdentifier); double avgRowSize = stats.getAvgRowSize(idxIdentifier, false);
RexBuilder builder = scanRel.getCluster().getRexBuilder(); PlannerSettings settings = PrelUtil.getSettings(scanRel.getCluster()); String conditionAsStr = convertRexToString(condition, scanRel.getRowType()); if (statsCache.get(conditionAsStr) == null && payload.getRowCount() != Statistics.ROWCOUNT_UNKNOWN) { Map<String, StatisticsPayload> payloadMap = new HashMap<>(); payloadMap.put(buildUniqueIndexIdentifier(idx), payload); statsCache.put(conditionAsStr, payloadMap); logger.debug("Statistics: StatsCache:<{}, {}>",conditionAsStr, payload); RexNode preProcIdxCondition = convertToStatsCondition(condition, idx, context, scanRel, Arrays.asList(SqlKind.CAST)); QueryCondition queryCondition = jTabGrpScan.convertToQueryCondition(convertToLogicalExpression(preProcIdxCondition, rowType, settings, builder)); if (queryCondition != null) { Map<String, StatisticsPayload> payloadMap = statsCache.get(conditionAsStr); if (payloadMap != null) { if (payloadMap.get(buildUniqueIndexIdentifier(idx)) == null) { payloadMap.put(buildUniqueIndexIdentifier(idx), payload);
@Override public boolean isDistributed() { // getMaxParallelizationWidth gets information about all regions to scan and is expensive. // This option is meant to be used only for unit tests. boolean useNumRegions = storagePlugin.getContext().getConfig().getBoolean(PluginConstants.JSON_TABLE_USE_NUM_REGIONS_FOR_DISTRIBUTION_PLANNING); double fullTableSize; if (useNumRegions) { return getMaxParallelizationWidth() > 1 ? true: false; } // This function gets called multiple times during planning. To avoid performance // bottleneck, estimate degree of parallelization using stats instead of actually getting information // about all regions. double rowCount, rowSize; double scanRangeSize = storagePlugin.getContext().getConfig().getInt(PluginConstants.JSON_TABLE_SCAN_SIZE_MB) * 1024 * 1024; if (scanSpec.getIndexDesc() != null) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexName()); rowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier); rowSize = stats.getAvgRowSize(idxIdentifier, false); } else { rowCount = stats.getRowCount(scanSpec.getCondition(), null); rowSize = stats.getAvgRowSize(null, false); } if (rowCount == ROWCOUNT_UNKNOWN || rowCount == 0 || rowSize == AVG_ROWSIZE_UNKNOWN || rowSize == 0) { fullTableSize = (scanSpec.getSerializedFilter() != null ? .5 : 1) * this.fullTableEstimatedSize; } else { fullTableSize = rowCount * rowSize; } return (long) fullTableSize / scanRangeSize > 1 ? true : false; }
final TreeMap<TabletFragmentInfo, String> regionsToScan = new TreeMap<>(); if (isIndexScan()) { String idxIdentifier = stats.buildUniqueIndexIdentifier(scanSpec.getIndexDesc().getPrimaryTablePath(), scanSpec.getIndexDesc().getIndexName()); if (stats.isStatsAvailable()) { estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), idxIdentifier); if (stats.isStatsAvailable()) { estimatedRowCount = stats.getRowCount(scanSpec.getCondition(), null);