private PlanNodeStatsEstimate normalize(PlanNodeStatsEstimate stats, Optional<Collection<Symbol>> outputSymbols, TypeProvider types) { if (stats.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder normalized = PlanNodeStatsEstimate.buildFrom(stats); Predicate<Symbol> symbolFilter = outputSymbols .map(ImmutableSet::copyOf) .map(set -> (Predicate<Symbol>) set::contains) .orElse(symbol -> true); for (Symbol symbol : stats.getSymbolsWithKnownStatistics()) { if (!symbolFilter.test(symbol)) { normalized.removeSymbolStatistics(symbol); continue; } SymbolStatsEstimate symbolStats = stats.getSymbolStatistics(symbol); SymbolStatsEstimate normalizedSymbolStats = stats.getOutputRowCount() == 0 ? SymbolStatsEstimate.zero() : normalizeSymbolStats(symbol, symbolStats, stats, types); if (normalizedSymbolStats.isUnknown()) { normalized.removeSymbolStatistics(symbol); continue; } if (!Objects.equals(normalizedSymbolStats, symbolStats)) { normalized.addSymbolStatistics(symbol, normalizedSymbolStats); } } return normalized.build(); }
private static PlanNodeStatsEstimate addStats(PlanNodeStatsEstimate left, PlanNodeStatsEstimate right, RangeAdditionStrategy strategy) { if (left.isOutputRowCountUnknown() || right.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder statsBuilder = PlanNodeStatsEstimate.builder(); double newRowCount = left.getOutputRowCount() + right.getOutputRowCount(); concat(left.getSymbolsWithKnownStatistics().stream(), right.getSymbolsWithKnownStatistics().stream()) .distinct() .forEach(symbol -> { SymbolStatsEstimate symbolStats = SymbolStatsEstimate.zero(); if (newRowCount > 0) { symbolStats = addColumnStats( left.getSymbolStatistics(symbol), left.getOutputRowCount(), right.getSymbolStatistics(symbol), right.getOutputRowCount(), newRowCount, strategy); } statsBuilder.addSymbolStatistics(symbol, symbolStats); }); return statsBuilder.setOutputRowCount(newRowCount).build(); }
private PlanNodeStatsEstimate estimateLogicalAnd(Expression left, Expression right) { // first try to estimate in the fair way PlanNodeStatsEstimate leftEstimate = process(left); if (!leftEstimate.isOutputRowCountUnknown()) { PlanNodeStatsEstimate logicalAndEstimate = new FilterExpressionStatsCalculatingVisitor(leftEstimate, session, types).process(right); if (!logicalAndEstimate.isOutputRowCountUnknown()) { return logicalAndEstimate; } } // If some of the filters cannot be estimated, take the smallest estimate. // Apply 0.9 filter factor as "unknown filter" factor. PlanNodeStatsEstimate rightEstimate = process(right); PlanNodeStatsEstimate smallestKnownEstimate; if (leftEstimate.isOutputRowCountUnknown()) { smallestKnownEstimate = rightEstimate; } else if (rightEstimate.isOutputRowCountUnknown()) { smallestKnownEstimate = leftEstimate; } else { smallestKnownEstimate = leftEstimate.getOutputRowCount() <= rightEstimate.getOutputRowCount() ? leftEstimate : rightEstimate; } if (smallestKnownEstimate.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } return smallestKnownEstimate.mapOutputRowCount(rowCount -> rowCount * UNKNOWN_FILTER_COEFFICIENT); }
/** * Returns estimated data size. * Unknown value is represented by {@link Double#NaN} */ public double getOutputSizeInBytes(Collection<Symbol> outputSymbols, TypeProvider types) { requireNonNull(outputSymbols, "outputSymbols is null"); return outputSymbols.stream() .mapToDouble(symbol -> getOutputSizeForSymbol(getSymbolStatistics(symbol), types.get(symbol))) .sum(); }
public PlanNodeStatsEstimate mapSymbolColumnStatistics(Symbol symbol, Function<SymbolStatsEstimate, SymbolStatsEstimate> mappingFunction) { return buildFrom(this) .addSymbolStatistics(symbol, mappingFunction.apply(getSymbolStatistics(symbol))) .build(); }
private PlanNodeStatsEstimate mapToOutputSymbols(PlanNodeStatsEstimate estimate, List<Symbol> inputs, List<Symbol> outputs) { checkArgument(inputs.size() == outputs.size(), "Input symbols count does not match output symbols count"); PlanNodeStatsEstimate.Builder mapped = PlanNodeStatsEstimate.builder() .setOutputRowCount(estimate.getOutputRowCount()); for (int i = 0; i < inputs.size(); i++) { mapped.addSymbolStatistics(outputs.get(i), estimate.getSymbolStatistics(inputs.get(i))); } return mapped.build(); } }
SymbolStatsEstimate leftColumnStats = leftStats.getSymbolStatistics(drivingClause.getLeft()); SymbolStatsEstimate rightColumnStats = rightStats.getSymbolStatistics(drivingClause.getRight()); double scaleFactor = nonMatchingLeftValuesFraction + leftColumnStats.getNullsFraction(); double newLeftNullsFraction = leftColumnStats.getNullsFraction() / scaleFactor; result = result.mapSymbolColumnStatistics(drivingClause.getLeft(), columnStats -> SymbolStatsEstimate.buildFrom(columnStats) .setLowValue(leftColumnStats.getLowValue()) .setDistinctValuesCount(leftNDV - matchingRightNDV) .build()); result = result.mapOutputRowCount(rowCount -> rowCount * scaleFactor); result = result.mapSymbolColumnStatistics(drivingClause.getLeft(), columnStats -> SymbolStatsEstimate.buildFrom(columnStats) .setLowValue(NaN) .setDistinctValuesCount(0.0) .build()); result = result.mapOutputRowCount(rowCount -> rowCount * leftColumnStats.getNullsFraction()); return PlanNodeStatsEstimate.unknown(); result = result.mapOutputRowCount(rowCount -> min(leftStats.getOutputRowCount(), rowCount / Math.pow(UNKNOWN_FILTER_COEFFICIENT, numberOfRemainingClauses)));
@Override protected SymbolStatsEstimate visitSymbolReference(SymbolReference node, Void context) { return input.getSymbolStatistics(Symbol.from(node)); }
@Override protected PlanNodeStatsEstimate visitExpression(Expression node, Void context) { return PlanNodeStatsEstimate.unknown(); }
PlanNodeStatsEstimate joinComplementStats) double innerJoinRowCount = innerJoinStats.getOutputRowCount(); double joinComplementRowCount = joinComplementStats.getOutputRowCount(); if (joinComplementRowCount == 0) { return innerJoinStats; PlanNodeStatsEstimate.Builder outputStats = PlanNodeStatsEstimate.buildFrom(innerJoinStats); outputStats.setOutputRowCount(outputRowCount); for (Symbol symbol : joinComplementStats.getSymbolsWithKnownStatistics()) { SymbolStatsEstimate leftSymbolStats = sourceStats.getSymbolStatistics(symbol); SymbolStatsEstimate innerJoinSymbolStats = innerJoinStats.getSymbolStatistics(symbol); SymbolStatsEstimate joinComplementSymbolStats = joinComplementStats.getSymbolStatistics(symbol); for (Symbol symbol : difference(innerJoinStats.getSymbolsWithKnownStatistics(), joinComplementStats.getSymbolsWithKnownStatistics())) { SymbolStatsEstimate innerJoinSymbolStats = innerJoinStats.getSymbolStatistics(symbol); double newNullsFraction = (innerJoinSymbolStats.getNullsFraction() * innerJoinRowCount + joinComplementRowCount) / outputRowCount; outputStats.addSymbolStatistics(symbol, innerJoinSymbolStats.mapNullsFraction(nullsFraction -> newNullsFraction));
if (Double.compare(stats.getOutputRowCount(), normalized.getOutputRowCount()) != 0) { problems.add(format( "Output row count is %s, should be normalized to %s", stats.getOutputRowCount(), normalized.getOutputRowCount())); for (Symbol symbol : stats.getSymbolsWithKnownStatistics()) { if (!Objects.equals(stats.getSymbolStatistics(symbol), normalized.getSymbolStatistics(symbol))) { problems.add(format( "Symbol stats for '%s' are \n\t\t\t\t\t%s, should be normalized to \n\t\t\t\t\t%s", symbol, stats.getSymbolStatistics(symbol), normalized.getSymbolStatistics(symbol))); problems.add(stats.toString());
BiFunction<SymbolStatsEstimate, SymbolStatsEstimate, Double> retainedNdvProvider) SymbolStatsEstimate sourceJoinSymbolStats = sourceStats.getSymbolStatistics(sourceJoinSymbol); SymbolStatsEstimate filteringSourceJoinSymbolStats = filteringSourceStats.getSymbolStatistics(filteringSourceJoinSymbol); return PlanNodeStatsEstimate.buildFrom(sourceStats) .addSymbolStatistics(sourceJoinSymbol, newSourceJoinSymbolStats) .setOutputRowCount(0) double outputRowCount = sourceStats.getOutputRowCount() * filterFactor; return PlanNodeStatsEstimate.buildFrom(sourceStats) .addSymbolStatistics(sourceJoinSymbol, newSourceJoinSymbolStats) .setOutputRowCount(outputRowCount)
public PlanNodeStatsAssertion equalTo(PlanNodeStatsEstimate expected) { assertEstimateEquals(actual.getOutputRowCount(), expected.getOutputRowCount(), "outputRowCount mismatch"); for (Symbol symbol : union(expected.getSymbolsWithKnownStatistics(), actual.getSymbolsWithKnownStatistics())) { assertSymbolStatsEqual(symbol, actual.getSymbolStatistics(symbol), expected.getSymbolStatistics(symbol)); } return this; }
@Test public void testSubtractRowCount() { PlanNodeStatsEstimate unknownStats = statistics(NaN, NaN, NaN, StatisticRange.empty()); PlanNodeStatsEstimate first = statistics(40, NaN, NaN, StatisticRange.empty()); PlanNodeStatsEstimate second = statistics(10, NaN, NaN, StatisticRange.empty()); assertEquals(subtractSubsetStats(unknownStats, unknownStats), PlanNodeStatsEstimate.unknown()); assertEquals(subtractSubsetStats(first, unknownStats), PlanNodeStatsEstimate.unknown()); assertEquals(subtractSubsetStats(unknownStats, second), PlanNodeStatsEstimate.unknown()); assertEquals(subtractSubsetStats(first, second).getOutputRowCount(), 30.0); }
private PlanNodeStatsEstimate computeInnerJoinStats(JoinNode node, PlanNodeStatsEstimate crossJoinStats, Session session, TypeProvider types) { List<EquiJoinClause> equiJoinCriteria = node.getCriteria(); if (equiJoinCriteria.isEmpty()) { if (!node.getFilter().isPresent()) { return crossJoinStats; } // TODO: this might explode stats return filterStatsCalculator.filterStats(crossJoinStats, node.getFilter().get(), session, types); } PlanNodeStatsEstimate equiJoinEstimate = filterByEquiJoinClauses(crossJoinStats, node.getCriteria(), session, types); if (equiJoinEstimate.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } if (!node.getFilter().isPresent()) { return equiJoinEstimate; } PlanNodeStatsEstimate filteredEquiJoinEstimate = filterStatsCalculator.filterStats(equiJoinEstimate, node.getFilter().get(), session, types); if (filteredEquiJoinEstimate.isOutputRowCountUnknown()) { return normalizer.normalize(equiJoinEstimate.mapOutputRowCount(rowCount -> rowCount * UNKNOWN_FILTER_COEFFICIENT), types); } return filteredEquiJoinEstimate; }
private String formatPlanNodeStatsAndCost(PlanNode node) { PlanNodeStatsEstimate stats = estimatedStatsAndCosts.getStats().getOrDefault(node.getId(), PlanNodeStatsEstimate.unknown()); PlanNodeCostEstimate cost = estimatedStatsAndCosts.getCosts().getOrDefault(node.getId(), PlanNodeCostEstimate.unknown()); return format("{rows: %s (%s), cpu: %s, memory: %s, network: %s}", formatAsLong(stats.getOutputRowCount()), formatEstimateAsDataSize(stats.getOutputSizeInBytes(node.getOutputSymbols(), types)), formatDouble(cost.getCpuCost()), formatDouble(cost.getMemoryCost()), formatDouble(cost.getNetworkCost())); } }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { // TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator Constraint<ColumnHandle> constraint = new Constraint<>(node.getCurrentConstraint()); TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint); Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>(); for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) { Symbol symbol = entry.getKey(); Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue())); outputSymbolStats.put(symbol, columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics)).orElse(SymbolStatsEstimate.unknown())); } return Optional.of(PlanNodeStatsEstimate.builder() .setOutputRowCount(tableStatistics.getRowCount().getValue()) .addSymbolStatistics(outputSymbolStats) .build()); }
@Override public OptionalDouble getValueFromPlanNodeEstimate(PlanNodeStatsEstimate planNodeStatsEstimate, StatsContext statsContext) { return asOptional(planNodeStatsEstimate.getOutputRowCount()); }
private static PlanNodeStatsEstimate estimateExpressionEqualToExpression( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate leftExpressionStatistics, Optional<Symbol> leftExpressionSymbol, SymbolStatsEstimate rightExpressionStatistics, Optional<Symbol> rightExpressionSymbol) { if (isNaN(leftExpressionStatistics.getDistinctValuesCount()) || isNaN(rightExpressionStatistics.getDistinctValuesCount())) { return PlanNodeStatsEstimate.unknown(); } StatisticRange leftExpressionRange = StatisticRange.from(leftExpressionStatistics); StatisticRange rightExpressionRange = StatisticRange.from(rightExpressionStatistics); StatisticRange intersect = leftExpressionRange.intersect(rightExpressionRange); double nullsFilterFactor = (1 - leftExpressionStatistics.getNullsFraction()) * (1 - rightExpressionStatistics.getNullsFraction()); double leftNdv = leftExpressionRange.getDistinctValuesCount(); double rightNdv = rightExpressionRange.getDistinctValuesCount(); double filterFactor = 1.0 / max(leftNdv, rightNdv, 1); double retainedNdv = min(leftNdv, rightNdv); PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics) .setOutputRowCount(inputStatistics.getOutputRowCount() * nullsFilterFactor * filterFactor); SymbolStatsEstimate equalityStats = SymbolStatsEstimate.builder() .setAverageRowSize(averageExcludingNaNs(leftExpressionStatistics.getAverageRowSize(), rightExpressionStatistics.getAverageRowSize())) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); leftExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); rightExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); return estimate.build(); }
@Override public Optional<PlanNodeStatsEstimate> doCalculate(FilterNode node, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(node.getSource()); PlanNodeStatsEstimate estimate = filterStatsCalculator.filterStats(sourceStats, node.getPredicate(), session, types); if (isDefaultFilterFactorEnabled(session) && estimate.isOutputRowCountUnknown()) { estimate = sourceStats.mapOutputRowCount(sourceRowCount -> sourceStats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT); } return Optional.of(estimate); } }