private static OrcPredicate getPredicate(TupleDomain<RaptorColumnHandle> effectivePredicate, Map<Long, Integer> indexMap) { ImmutableList.Builder<ColumnReference<RaptorColumnHandle>> columns = ImmutableList.builder(); for (RaptorColumnHandle column : effectivePredicate.getDomains().get().keySet()) { Integer index = indexMap.get(column.getColumnId()); if (index != null) { columns.add(new ColumnReference<>(column, index, column.getColumnType())); } } return new TupleDomainOrcPredicate<>(effectivePredicate, columns.build(), false); }
private boolean columnOverlaps(ColumnReference<C> columnReference, Domain predicateDomain, long numberOfRows, ColumnStatistics columnStatistics) Domain stripeDomain = getDomain(columnReference.getType(), numberOfRows, columnStatistics); if (!stripeDomain.overlaps(predicateDomain)) { Optional<Collection<Object>> discreteValues = extractDiscreteValues(predicateDomain.getValues()); if (!discreteValues.isPresent()) { if (discreteValues.get().stream().noneMatch(value -> checkInBloomFilter(bloomFilter, value, stripeDomain.getType()))) { return false;
@Override public boolean matches(long numberOfRows, Map<Integer, ColumnStatistics> statisticsByColumnIndex) { Optional<Map<C, Domain>> optionalEffectivePredicateDomains = effectivePredicate.getDomains(); if (!optionalEffectivePredicateDomains.isPresent()) { // effective predicate is none, so skip this section return false; } Map<C, Domain> effectivePredicateDomains = optionalEffectivePredicateDomains.get(); for (ColumnReference<C> columnReference : columnReferences) { Domain predicateDomain = effectivePredicateDomains.get(columnReference.getColumn()); if (predicateDomain == null) { // no predicate on this column, so we can't exclude this section continue; } ColumnStatistics columnStatistics = statisticsByColumnIndex.get(columnReference.getOrdinal()); if (columnStatistics == null) { // no statistics for this column, so we can't exclude this section continue; } if (!columnOverlaps(columnReference, predicateDomain, numberOfRows, columnStatistics)) { return false; } } // this section was not excluded return true; }
@Test public void testBinary() { assertEquals(getDomain(VARBINARY, 0, null), none(VARBINARY)); assertEquals(getDomain(VARBINARY, 10, null), all(VARBINARY)); assertEquals(getDomain(VARBINARY, 0, binaryColumnStats(null)), none(VARBINARY)); assertEquals(getDomain(VARBINARY, 0, binaryColumnStats(0L)), none(VARBINARY)); assertEquals(getDomain(VARBINARY, 0, binaryColumnStats(0L)), none(VARBINARY)); assertEquals(getDomain(VARBINARY, 10, binaryColumnStats(0L)), onlyNull(VARBINARY)); assertEquals(getDomain(VARBINARY, 10, binaryColumnStats(10L)), notNull(VARBINARY)); assertEquals(getDomain(VARBINARY, 20, binaryColumnStats(10L)), all(VARBINARY)); }
.build(); TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true); TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true); null)); assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex)); assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex)); assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex)); assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
@Test public void testBloomFilterPredicateValuesNonExisting() { BloomFilter bloomFilter = new BloomFilter(TEST_VALUES.size() * 10, 0.01); for (Map.Entry<Object, Type> testValue : TEST_VALUES.entrySet()) { boolean matched = checkInBloomFilter(bloomFilter, testValue.getKey(), testValue.getValue()); assertFalse(matched, "type " + testValue.getKey().getClass()); } // test unsupported type: can be supported by ORC but is not implemented yet assertTrue(checkInBloomFilter(bloomFilter, new Date(), DATE), "unsupported type DATE should always return true"); }
@Test public void testExtractValuesFromSingleDomain() { Map<Type, Object> testValues = ImmutableMap.<Type, Object>builder() .put(BOOLEAN, true) .put(INTEGER, 1234L) .put(BIGINT, 4321L) .put(DOUBLE, 0.123) .put(VARCHAR, utf8Slice(TEST_STRING)) .build(); for (Map.Entry<Type, Object> testValue : testValues.entrySet()) { Domain predicateDomain = Domain.singleValue(testValue.getKey(), testValue.getValue()); Optional<Collection<Object>> discreteValues = extractDiscreteValues(predicateDomain.getValues()); assertTrue(discreteValues.isPresent()); Collection<Object> objects = discreteValues.get(); assertEquals(objects.size(), 1); assertEquals(objects.iterator().next(), testValue.getValue()); } }
private static <T extends Comparable<T>> Domain createDomain(Type type, boolean hasNullValue, RangeStatistics<T> rangeStatistics) { return createDomain(type, hasNullValue, rangeStatistics, value -> value); }
@Test public void testBinary() { assertEquals(getDomain(VARBINARY, 0, null), none(VARBINARY)); assertEquals(getDomain(VARBINARY, 10, null), all(VARBINARY)); assertEquals(getDomain(VARBINARY, 0, binaryColumnStats(null)), none(VARBINARY)); assertEquals(getDomain(VARBINARY, 0, binaryColumnStats(0L)), none(VARBINARY)); assertEquals(getDomain(VARBINARY, 0, binaryColumnStats(0L)), none(VARBINARY)); assertEquals(getDomain(VARBINARY, 10, binaryColumnStats(0L)), onlyNull(VARBINARY)); assertEquals(getDomain(VARBINARY, 10, binaryColumnStats(10L)), notNull(VARBINARY)); assertEquals(getDomain(VARBINARY, 20, binaryColumnStats(10L)), all(VARBINARY)); }
.build(); TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true); TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true); null)); assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex)); assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex)); assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex)); assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
@Test public void testBloomFilterPredicateValuesNonExisting() { BloomFilter bloomFilter = new BloomFilter(TEST_VALUES.size() * 10, 0.01); for (Map.Entry<Object, Type> testValue : TEST_VALUES.entrySet()) { boolean matched = checkInBloomFilter(bloomFilter, testValue.getKey(), testValue.getValue()); assertFalse(matched, "type " + testValue.getKey().getClass()); } // test unsupported type: can be supported by ORC but is not implemented yet assertTrue(checkInBloomFilter(bloomFilter, new Date(), DATE), "unsupported type DATE should always return true"); }
@Test public void testExtractValuesFromSingleDomain() { Map<Type, Object> testValues = ImmutableMap.<Type, Object>builder() .put(BOOLEAN, true) .put(INTEGER, 1234L) .put(BIGINT, 4321L) .put(DOUBLE, 0.123) .put(VARCHAR, utf8Slice(TEST_STRING)) .build(); for (Map.Entry<Type, Object> testValue : testValues.entrySet()) { Domain predicateDomain = Domain.singleValue(testValue.getKey(), testValue.getValue()); Optional<Collection<Object>> discreteValues = extractDiscreteValues(predicateDomain.getValues()); assertTrue(discreteValues.isPresent()); Collection<Object> objects = discreteValues.get(); assertEquals(objects.size(), 1); assertEquals(objects.iterator().next(), testValue.getValue()); } }
private static <T extends Comparable<T>> Domain createDomain(Type type, boolean hasNullValue, RangeStatistics<T> rangeStatistics) { return createDomain(type, hasNullValue, rangeStatistics, value -> value); }
private boolean columnOverlaps(ColumnReference<C> columnReference, Domain predicateDomain, long numberOfRows, ColumnStatistics columnStatistics) Domain stripeDomain = getDomain(columnReference.getType(), numberOfRows, columnStatistics); if (!stripeDomain.overlaps(predicateDomain)) { Optional<Collection<Object>> discreteValues = extractDiscreteValues(predicateDomain.getValues()); if (!discreteValues.isPresent()) { if (discreteValues.get().stream().noneMatch(value -> checkInBloomFilter(bloomFilter, value, stripeDomain.getType()))) { return false;
@Test public void testBoolean() { assertEquals(getDomain(BOOLEAN, 0, null), none(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 10, null), all(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 0, booleanColumnStats(null, null)), none(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 0, booleanColumnStats(0L, null)), none(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 0, booleanColumnStats(0L, 0L)), none(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 10, booleanColumnStats(0L, 0L)), onlyNull(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 10, booleanColumnStats(10L, null)), notNull(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 10, booleanColumnStats(10L, 10L)), singleValue(BOOLEAN, true)); assertEquals(getDomain(BOOLEAN, 10, booleanColumnStats(10L, 0L)), singleValue(BOOLEAN, false)); assertEquals(getDomain(BOOLEAN, 20, booleanColumnStats(10L, 5L)), all(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 20, booleanColumnStats(10L, 10L)), create(ValueSet.ofRanges(Range.equal(BOOLEAN, true)), true)); assertEquals(getDomain(BOOLEAN, 20, booleanColumnStats(10L, 0L)), create(ValueSet.ofRanges(Range.equal(BOOLEAN, false)), true)); }
OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled);
boolean matched = checkInBloomFilter(bloomFilter, testValue.getKey(), testValue.getValue()); assertTrue(matched, "type " + testValue.getClass()); assertTrue(checkInBloomFilter(bloomFilter, new Date(), DATE), "unsupported type DATE should always return true");
return createDomain(type, hasNullValue, columnStatistics.getDecimalStatistics(), value -> rescale(value, (DecimalType) type).unscaledValue().longValue()); return createDomain(type, hasNullValue, columnStatistics.getDecimalStatistics(), value -> encodeUnscaledValue(rescale(value, (DecimalType) type).unscaledValue())); return createDomain(type, hasNullValue, columnStatistics.getStringStatistics(), value -> truncateToLengthAndTrimSpaces(value, type)); return createDomain(type, hasNullValue, columnStatistics.getStringStatistics()); return createDomain(type, hasNullValue, columnStatistics.getDateStatistics(), value -> (long) value); return createDomain(type, hasNullValue, columnStatistics.getIntegerStatistics()); return createDomain(type, hasNullValue, columnStatistics.getDoubleStatistics()); return createDomain(type, hasNullValue, columnStatistics.getDoubleStatistics(), value -> (long) floatToRawIntBits(value.floatValue()));
@Override public boolean matches(long numberOfRows, Map<Integer, ColumnStatistics> statisticsByColumnIndex) { Optional<Map<C, Domain>> optionalEffectivePredicateDomains = effectivePredicate.getDomains(); if (!optionalEffectivePredicateDomains.isPresent()) { // effective predicate is none, so skip this section return false; } Map<C, Domain> effectivePredicateDomains = optionalEffectivePredicateDomains.get(); for (ColumnReference<C> columnReference : columnReferences) { Domain predicateDomain = effectivePredicateDomains.get(columnReference.getColumn()); if (predicateDomain == null) { // no predicate on this column, so we can't exclude this section continue; } ColumnStatistics columnStatistics = statisticsByColumnIndex.get(columnReference.getOrdinal()); if (columnStatistics == null) { // no statistics for this column, so we can't exclude this section continue; } if (!columnOverlaps(columnReference, predicateDomain, numberOfRows, columnStatistics)) { return false; } } // this section was not excluded return true; }
@Test public void testBoolean() { assertEquals(getDomain(BOOLEAN, 0, null), none(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 10, null), all(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 0, booleanColumnStats(null, null)), none(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 0, booleanColumnStats(0L, null)), none(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 0, booleanColumnStats(0L, 0L)), none(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 10, booleanColumnStats(0L, 0L)), onlyNull(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 10, booleanColumnStats(10L, null)), notNull(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 10, booleanColumnStats(10L, 10L)), singleValue(BOOLEAN, true)); assertEquals(getDomain(BOOLEAN, 10, booleanColumnStats(10L, 0L)), singleValue(BOOLEAN, false)); assertEquals(getDomain(BOOLEAN, 20, booleanColumnStats(10L, 5L)), all(BOOLEAN)); assertEquals(getDomain(BOOLEAN, 20, booleanColumnStats(10L, 10L)), create(ValueSet.ofRanges(Range.equal(BOOLEAN, true)), true)); assertEquals(getDomain(BOOLEAN, 20, booleanColumnStats(10L, 0L)), create(ValueSet.ofRanges(Range.equal(BOOLEAN, false)), true)); }