private static boolean isStripeSatisfyPredicate( StripeStatistics stripeStatistics, SearchArgument sarg, int[] filterColumns, final SchemaEvolution evolution) { List<PredicateLeaf> predLeaves = sarg.getLeaves(); TruthValue[] truthValues = new TruthValue[predLeaves.size()]; for (int pred = 0; pred < truthValues.length; pred++) { if (filterColumns[pred] != -1) { if (evolution != null && !evolution.isPPDSafeConversion(filterColumns[pred])) { truthValues[pred] = TruthValue.YES_NO_NULL; } else { // column statistics at index 0 contains only the number of rows ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]]; truthValues[pred] = RecordReaderImpl.evaluatePredicate(stats, predLeaves.get(pred), null); } } else { // parition column case. // partition filter will be evaluated by partition pruner so // we will not evaluate partition filter here. truthValues[pred] = TruthValue.YES_NO_NULL; } } return sarg.evaluate(truthValues).isNeeded(); }
public List<StripeStatistics> getStripeStatistics() throws IOException { List<StripeStatistics> result = new ArrayList<>(); List<OrcProto.StripeStatistics> ssProto = getStripeStatisticsProto(); if (ssProto != null) { for (OrcProto.StripeStatistics ss : ssProto) { result.add(new StripeStatistics(ss.getColStatsList())); } } return result; }
@Override public List<StripeStatistics> getStripeStatistics() throws IOException { if (metadata == null) { CompressionCodec codec = OrcCodecPool.getCodec(compressionKind); try { metadata = extractMetadata(tail.getSerializedTail(), 0, metadataSize, codec, bufferSize); } finally { OrcCodecPool.returnCodec(compressionKind, codec); } } if (stripeStats == null) { stripeStats = metadata.getStripeStatsList(); } List<StripeStatistics> result = new ArrayList<>(); for (OrcProto.StripeStatistics ss : stripeStats) { result.add(new StripeStatistics(ss.getColStatsList())); } return result; }
} else { ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred]];
assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues()); assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues()); assertEquals(1000, ss3.getColumnStatistics()[0].getNumberOfValues()); assertEquals(5000, (ss1.getColumnStatistics()[1]).getNumberOfValues()); assertEquals(5000, (ss2.getColumnStatistics()[1]).getNumberOfValues()); assertEquals(1000, (ss3.getColumnStatistics()[1]).getNumberOfValues()); assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMinimum()); assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMinimum()); assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMinimum()); assertEquals(1, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getMaximum()); assertEquals(2, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getMaximum()); assertEquals(3, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getMaximum()); assertEquals(5000, ((IntegerColumnStatistics)ss1.getColumnStatistics()[1]).getSum()); assertEquals(10000, ((IntegerColumnStatistics)ss2.getColumnStatistics()[1]).getSum()); assertEquals(3000, ((IntegerColumnStatistics)ss3.getColumnStatistics()[1]).getSum()); assertEquals(5000, (ss1.getColumnStatistics()[2]).getNumberOfValues()); assertEquals(5000, (ss2.getColumnStatistics()[2]).getNumberOfValues()); assertEquals(1000, (ss3.getColumnStatistics()[2]).getNumberOfValues()); assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMinimum()); assertEquals("two", ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getMinimum()); assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMinimum()); assertEquals("one", ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getMaximum()); assertEquals("two", ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getMaximum()); assertEquals("three", ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getMaximum()); assertEquals(15000, ((StringColumnStatistics)ss1.getColumnStatistics()[2]).getSum()); assertEquals(15000, ((StringColumnStatistics)ss2.getColumnStatistics()[2]).getSum()); assertEquals(5000, ((StringColumnStatistics)ss3.getColumnStatistics()[2]).getSum());
if(columnStatsPresent) { ColumnStatistics[] colStats = stats.get(firstStripeIndex).getColumnStatistics(); if(columnStatsPresent) { ColumnStatistics[] colStats = stats.get(lastStripeIndex).getColumnStatistics(); IntegerColumnStatistics origWriteId = (IntegerColumnStatistics) colStats[OrcRecordUpdater.ORIGINAL_WRITEID + 1]; .getColumnStatistics(); IntegerColumnStatistics bucketProperty = (IntegerColumnStatistics) colStats[OrcRecordUpdater.BUCKET + 1];
assertEquals(2, ss.getColumnStatistics()[0].getNumberOfValues()); assertEquals(1, ((BooleanColumnStatistics) ss.getColumnStatistics()[1]).getTrueCount()); assertEquals(1024, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMinimum()); assertEquals(2048, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getMaximum()); assertEquals(3072, ((IntegerColumnStatistics) ss.getColumnStatistics()[3]).getSum()); assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum()); assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());