@Override public OptionalDouble getValueFromPlanNodeEstimate(PlanNodeStatsEstimate planNodeStatsEstimate, StatsContext statsContext) { double lowValue = getSymbolStatistics(planNodeStatsEstimate, columnName, statsContext).getLowValue(); if (isInfinite(lowValue)) { return OptionalDouble.empty(); } return OptionalDouble.of(lowValue); }
public SymbolStatsAssertion lowValue(double expected) { assertEstimateEquals(statistics.getLowValue(), expected, "lowValue mismatch"); return this; }
public static StatisticRange from(SymbolStatsEstimate estimate) { return new StatisticRange(estimate.getLowValue(), estimate.getHighValue(), estimate.getDistinctValuesCount()); }
private double maxDistinctValuesByLowHigh(SymbolStatsEstimate symbolStats, Type type) { if (symbolStats.statisticRange().length() == 0.0) { return 1; } if (!isDiscrete(type)) { return NaN; } double length = symbolStats.getHighValue() - symbolStats.getLowValue(); if (isNaN(length)) { return NaN; } if (type instanceof DecimalType) { length *= pow(10, ((DecimalType) type).getScale()); } return floor(length + 1); }
public SymbolStatsAssertion emptyRange() { assertTrue(isNaN(statistics.getLowValue()) && isNaN(statistics.getHighValue()), "expected empty range (NaN, NaN) but got (" + statistics.getLowValue() + ", " + statistics.getHighValue() + ") instead"); assertEquals(statistics.getDistinctValuesCount(), 0., "expected no distinctValuesCount"); assertEquals(statistics.getAverageRowSize(), 0., "expected 0 average row size"); assertEquals(statistics.getNullsFraction(), 1., "expected all nulls"); return this; }
@Override protected SymbolStatsEstimate visitArithmeticUnary(ArithmeticUnaryExpression node, Void context) { SymbolStatsEstimate stats = process(node.getValue()); switch (node.getSign()) { case PLUS: return stats; case MINUS: return SymbolStatsEstimate.buildFrom(stats) .setLowValue(-stats.getHighValue()) .setHighValue(-stats.getLowValue()) .build(); default: throw new IllegalStateException("Unexpected sign: " + node.getSign()); } }
OptionalDouble value = isNaN(rightStats.getLowValue()) ? OptionalDouble.empty() : OptionalDouble.of(rightStats.getLowValue()); return estimateExpressionToLiteralComparison(input, leftStats, leftSymbol, value, operator);
public static Builder buildFrom(SymbolStatsEstimate other) { return builder() .setLowValue(other.getLowValue()) .setHighValue(other.getHighValue()) .setNullsFraction(other.getNullsFraction()) .setAverageRowSize(other.getAverageRowSize()) .setDistinctValuesCount(other.getDistinctValuesCount()); }
private static void assertAddRange(PlanNodeStatsEstimate first, PlanNodeStatsEstimate second, double expectedLow, double expectedHigh) { SymbolStatsEstimate statistics = addStatsAndMaxDistinctValues(first, second).getSymbolStatistics(SYMBOL); assertEquals(statistics.getLowValue(), expectedLow); assertEquals(statistics.getHighValue(), expectedHigh); }
private static void assertCapRange(PlanNodeStatsEstimate stats, PlanNodeStatsEstimate cap, double expectedLow, double expectedHigh) { SymbolStatsEstimate symbolStats = capStats(stats, cap).getSymbolStatistics(SYMBOL); assertEquals(symbolStats.getLowValue(), expectedLow); assertEquals(symbolStats.getHighValue(), expectedHigh); }
double lowValue = sourceStats.getLowValue(); double highValue = sourceStats.getHighValue();
private Consumer<SymbolStatsAssertion> equalTo(SymbolStatsEstimate estimate) { return symbolAssert -> { symbolAssert .lowValue(estimate.getLowValue()) .highValue(estimate.getHighValue()) .distinctValuesCount(estimate.getDistinctValuesCount()) .nullsFraction(estimate.getNullsFraction()); }; }
private SymbolStatsEstimate estimateCoalesce(SymbolStatsEstimate left, SymbolStatsEstimate right) { // Question to reviewer: do you have a method to check if fraction is empty or saturated? if (left.getNullsFraction() == 0) { return left; } else if (left.getNullsFraction() == 1.0) { return right; } else { return SymbolStatsEstimate.builder() .setLowValue(min(left.getLowValue(), right.getLowValue())) .setHighValue(max(left.getHighValue(), right.getHighValue())) .setDistinctValuesCount(left.getDistinctValuesCount() + min(right.getDistinctValuesCount(), input.getOutputRowCount() * left.getNullsFraction())) .setNullsFraction(left.getNullsFraction() * right.getNullsFraction()) // TODO check if dataSize estimation method is correct .setAverageRowSize(max(left.getAverageRowSize(), right.getAverageRowSize())) .build(); } } }
public static PlanNodeStatsEstimate capStats(PlanNodeStatsEstimate stats, PlanNodeStatsEstimate cap) { if (stats.isOutputRowCountUnknown() || cap.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); double cappedRowCount = min(stats.getOutputRowCount(), cap.getOutputRowCount()); result.setOutputRowCount(cappedRowCount); stats.getSymbolsWithKnownStatistics().forEach(symbol -> { SymbolStatsEstimate symbolStats = stats.getSymbolStatistics(symbol); SymbolStatsEstimate capSymbolStats = cap.getSymbolStatistics(symbol); SymbolStatsEstimate.Builder newSymbolStats = SymbolStatsEstimate.builder(); // for simplicity keep the average row size the same as in the input // in most cases the average row size doesn't change after applying filters newSymbolStats.setAverageRowSize(symbolStats.getAverageRowSize()); newSymbolStats.setDistinctValuesCount(min(symbolStats.getDistinctValuesCount(), capSymbolStats.getDistinctValuesCount())); newSymbolStats.setLowValue(max(symbolStats.getLowValue(), capSymbolStats.getLowValue())); newSymbolStats.setHighValue(min(symbolStats.getHighValue(), capSymbolStats.getHighValue())); double numberOfNulls = stats.getOutputRowCount() * symbolStats.getNullsFraction(); double capNumberOfNulls = cap.getOutputRowCount() * capSymbolStats.getNullsFraction(); double cappedNumberOfNulls = min(numberOfNulls, capNumberOfNulls); double cappedNullsFraction = cappedRowCount == 0 ? 1 : cappedNumberOfNulls / cappedRowCount; newSymbolStats.setNullsFraction(cappedNullsFraction); result.addSymbolStatistics(symbol, newSymbolStats.build()); }); return result.build(); }
private static void assertSubtractRange(double supersetLow, double supersetHigh, double subsetLow, double subsetHigh, double expectedLow, double expectedHigh) { PlanNodeStatsEstimate first = statistics(30, NaN, NaN, new StatisticRange(supersetLow, supersetHigh, 10)); PlanNodeStatsEstimate second = statistics(20, NaN, NaN, new StatisticRange(subsetLow, subsetHigh, 5)); SymbolStatsEstimate statistics = subtractSubsetStats(first, second).getSymbolStatistics(SYMBOL); assertEquals(statistics.getLowValue(), expectedLow); assertEquals(statistics.getHighValue(), expectedHigh); }
private void assertSymbolStatsEqual(Symbol symbol, SymbolStatsEstimate actual, SymbolStatsEstimate expected) { assertEstimateEquals(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction mismatch for %s", symbol.getName()); assertEstimateEquals(actual.getLowValue(), expected.getLowValue(), "lowValue mismatch for %s", symbol.getName()); assertEstimateEquals(actual.getHighValue(), expected.getHighValue(), "highValue mismatch for %s", symbol.getName()); assertEstimateEquals(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinct values count mismatch for %s", symbol.getName()); assertEstimateEquals(actual.getAverageRowSize(), expected.getAverageRowSize(), "average row size mismatch for %s", symbol.getName()); } }
public SymbolStatsAssertion isEqualTo(SymbolStatsEstimate expected) { return nullsFraction(expected.getNullsFraction()) .lowValue(expected.getLowValue()) .highValue(expected.getHighValue()) .distinctValuesCount(expected.getDistinctValuesCount()) .averageRowSize(expected.getAverageRowSize()); } }
outputStats.addSymbolStatistics(symbol, SymbolStatsEstimate.buildFrom(innerJoinSymbolStats) .setLowValue(leftSymbolStats.getLowValue()) .setHighValue(leftSymbolStats.getHighValue()) .setDistinctValuesCount(leftSymbolStats.getDistinctValuesCount())
@Override protected PlanNodeStatsEstimate visitBetweenPredicate(BetweenPredicate node, Void context) { if (!(node.getValue() instanceof SymbolReference)) { return PlanNodeStatsEstimate.unknown(); } if (!getExpressionStats(node.getMin()).isSingleValue()) { return PlanNodeStatsEstimate.unknown(); } if (!getExpressionStats(node.getMax()).isSingleValue()) { return PlanNodeStatsEstimate.unknown(); } SymbolStatsEstimate valueStats = input.getSymbolStatistics(Symbol.from(node.getValue())); Expression lowerBound = new ComparisonExpression(GREATER_THAN_OR_EQUAL, node.getValue(), node.getMin()); Expression upperBound = new ComparisonExpression(LESS_THAN_OR_EQUAL, node.getValue(), node.getMax()); Expression transformed; if (isInfinite(valueStats.getLowValue())) { // We want to do heuristic cut (infinite range to finite range) ASAP and then do filtering on finite range. // We rely on 'and()' being processed left to right transformed = and(lowerBound, upperBound); } else { transformed = and(upperBound, lowerBound); } return process(transformed); }
result = result.mapSymbolColumnStatistics(drivingClause.getLeft(), columnStats -> SymbolStatsEstimate.buildFrom(columnStats) .setLowValue(leftColumnStats.getLowValue()) .setHighValue(leftColumnStats.getHighValue()) .setNullsFraction(newLeftNullsFraction)