private static <F, T extends Comparable<T>> Domain createDomain(Type type, boolean hasNullValue, RangeStatistics<F> rangeStatistics, Function<F, T> function) { F min = rangeStatistics.getMin(); F max = rangeStatistics.getMax(); if (min != null && max != null) { return Domain.create(ValueSet.ofRanges(Range.range(type, function.apply(min), true, function.apply(max), true)), hasNullValue); } if (max != null) { return Domain.create(ValueSet.ofRanges(Range.lessThanOrEqual(type, function.apply(max))), hasNullValue); } if (min != null) { return Domain.create(ValueSet.ofRanges(Range.greaterThanOrEqual(type, function.apply(min))), hasNullValue); } return Domain.create(ValueSet.all(type), hasNullValue); }
private static TupleDomain<HiveColumnHandle> toCompactTupleDomain(TupleDomain<ColumnHandle> effectivePredicate, int threshold) { ImmutableMap.Builder<HiveColumnHandle, Domain> builder = ImmutableMap.builder(); effectivePredicate.getDomains().ifPresent(domains -> { for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) { HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) entry.getKey(); ValueSet values = entry.getValue().getValues(); ValueSet compactValueSet = values.getValuesProcessor().<Optional<ValueSet>>transform( ranges -> ranges.getRangeCount() > threshold ? Optional.of(ValueSet.ofRanges(ranges.getSpan())) : Optional.empty(), discreteValues -> discreteValues.getValues().size() > threshold ? Optional.of(ValueSet.all(values.getType())) : Optional.empty(), allOrNone -> Optional.empty()) .orElse(values); builder.put(hiveColumnHandle, Domain.create(compactValueSet, entry.getValue().isNullAllowed())); } }); return TupleDomain.withColumnDomains(builder.build()); }
@Test public void testBuildQueryOr() { TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( COL1, Domain.create(ValueSet.ofRanges(lessThan(BIGINT, 100L), greaterThan(BIGINT, 200L)), false))); Document query = MongoSession.buildQuery(tupleDomain); Document expected = new Document("$or", asList( new Document(COL1.getName(), new Document("$lt", 100L)), new Document(COL1.getName(), new Document("$gt", 200L)))); assertEquals(query, expected); }
@Test public void testEquatableSingleValue() { Domain domain = Domain.singleValue(ID, 0L); assertFalse(domain.isNone()); assertFalse(domain.isAll()); assertTrue(domain.isSingleValue()); assertTrue(domain.isNullableSingleValue()); assertFalse(domain.isOnlyNull()); assertFalse(domain.isNullAllowed()); assertEquals(domain.getValues(), ValueSet.of(ID, 0L)); assertEquals(domain.getType(), ID); assertTrue(domain.includesNullableValue(0L)); assertFalse(domain.includesNullableValue(null)); assertEquals(domain.complement(), Domain.create(ValueSet.of(ID, 0L).complement(), true)); assertEquals(domain.getSingleValue(), 0L); assertEquals(domain.getNullableSingleValue(), 0L); try { Domain.create(ValueSet.of(ID, 0L, 1L), false).getSingleValue(); fail(); } catch (IllegalStateException e) { } }
@Test public void testUnion() { assertUnion(Domain.all(BIGINT), Domain.all(BIGINT), Domain.all(BIGINT)); assertUnion(Domain.none(BIGINT), Domain.none(BIGINT), Domain.none(BIGINT)); assertUnion(Domain.all(BIGINT), Domain.none(BIGINT), Domain.all(BIGINT)); assertUnion(Domain.notNull(BIGINT), Domain.onlyNull(BIGINT), Domain.all(BIGINT)); assertUnion(Domain.singleValue(BIGINT, 0L), Domain.all(BIGINT), Domain.all(BIGINT)); assertUnion(Domain.singleValue(BIGINT, 0L), Domain.notNull(BIGINT), Domain.notNull(BIGINT)); assertUnion(Domain.singleValue(BIGINT, 0L), Domain.onlyNull(BIGINT), Domain.create(ValueSet.ofRanges(Range.equal(BIGINT, 0L)), true)); assertUnion(Domain.create(ValueSet.ofRanges(Range.equal(BIGINT, 1L)), true), Domain.create(ValueSet.ofRanges(Range.equal(BIGINT, 2L)), true), Domain.create(ValueSet.ofRanges(Range.equal(BIGINT, 1L), Range.equal(BIGINT, 2L)), true)); assertUnion(Domain.create(ValueSet.ofRanges(Range.equal(BIGINT, 1L)), true), Domain.create(ValueSet.ofRanges(Range.equal(BIGINT, 1L), Range.equal(BIGINT, 2L)), false), Domain.create(ValueSet.ofRanges(Range.equal(BIGINT, 1L), Range.equal(BIGINT, 2L)), true)); }
@Test public void testFromBasicComparisonsWithNulls() withColumnDomains(ImmutableMap.of(C_VARCHAR, Domain.create(ValueSet.none(VARCHAR), false)))); withColumnDomains(ImmutableMap.of(C_BIGINT, Domain.notNull(BIGINT)))); withColumnDomains(ImmutableMap.of(C_COLOR, Domain.notNull(COLOR))));
@Test public void testBuildQueryNull() { TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( COL1, Domain.create(ValueSet.ofRanges(greaterThan(BIGINT, 200L)), true))); Document query = MongoSession.buildQuery(tupleDomain); Document expected = new Document("$or", asList( new Document(COL1.getName(), new Document("$gt", 200L)), new Document(COL1.getName(), new Document("$exists", true).append("$eq", null)))); assertEquals(query, expected); } }
@Test public void testLegacyCharComparedToVarcharExpression() testSimpleComparison(notEqual(cast(C_CHAR, VARCHAR), stringLiteral("1234567890", VARCHAR)), C_CHAR, Domain.create(ValueSet.ofRanges( Range.lessThan(createCharType(10), Slices.utf8Slice("1234567890")), Range.greaterThan(createCharType(10), Slices.utf8Slice("1234567890"))), false)); testSimpleComparison(notEqual(cast(C_CHAR, VARCHAR), stringLiteral("12345678901", VARCHAR)), C_CHAR, Domain.notNull(createCharType(10))); testSimpleComparison(isDistinctFrom(cast(C_CHAR, VARCHAR), stringLiteral("1234567890", VARCHAR)), C_CHAR, Domain.create(ValueSet.ofRanges( Range.lessThan(createCharType(10), Slices.utf8Slice("1234567890")), Range.greaterThan(createCharType(10), Slices.utf8Slice("1234567890"))), true)); testSimpleComparison(isDistinctFrom(cast(C_CHAR, VARCHAR), stringLiteral("12345678901", VARCHAR)), C_CHAR, Domain.all(createCharType(10)));
new HiveTableLayoutHandle(tablePartitionFormat, partitionColumns, partitions, tupleDomain, tupleDomain, Optional.empty(), Optional.empty()), Optional.empty(), TupleDomain.withColumnDomains(ImmutableMap.of( dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile")), Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile")), Range.equal(createUnboundedVarcharType(), utf8Slice("rctext")), Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L), Range.equal(INTEGER, 2L), Range.equal(INTEGER, 3L), Range.equal(INTEGER, 4L)), false))), Optional.empty(), Optional.empty(), Optional.of(new DiscretePredicates(partitionColumns, ImmutableList.of( TupleDomain.withColumnDomains(ImmutableMap.of( dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L)), false))), TupleDomain.withColumnDomains(ImmutableMap.of( dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 2L)), false))), TupleDomain.withColumnDomains(ImmutableMap.of( dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rctext"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 3L)), false))), TupleDomain.withColumnDomains(ImmutableMap.of( dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 4L)), false)))))), ImmutableList.of()); List<HivePartition> unpartitionedPartitions = ImmutableList.of(new HivePartition(tableUnpartitioned));
private static <F, T extends Comparable<T>> Domain createDomain(Type type, boolean hasNullValue, ParquetRangeStatistics<F> rangeStatistics, Function<F, T> function) { F min = rangeStatistics.getMin(); F max = rangeStatistics.getMax(); if (min != null && max != null) { return Domain.create(ValueSet.ofRanges(Range.range(type, function.apply(min), true, function.apply(max), true)), hasNullValue); } if (max != null) { return Domain.create(ValueSet.ofRanges(Range.lessThanOrEqual(type, function.apply(max))), hasNullValue); } if (min != null) { return Domain.create(ValueSet.ofRanges(Range.greaterThanOrEqual(type, function.apply(min))), hasNullValue); } return Domain.create(ValueSet.all(type), hasNullValue); } }
@Test public void testMismatchedColumnWiseUnion() { TupleDomain<ColumnHandle> tupleDomain1 = TupleDomain.withColumnDomains( ImmutableMap.of( A, Domain.all(DOUBLE), B, Domain.singleValue(VARCHAR, utf8Slice("value")))); TupleDomain<ColumnHandle> tupleDomain2 = TupleDomain.withColumnDomains( ImmutableMap.of( A, Domain.create(ValueSet.ofRanges(Range.greaterThanOrEqual(DOUBLE, 0.0)), true), C, Domain.singleValue(BIGINT, 1L))); TupleDomain<ColumnHandle> expectedTupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(A, Domain.all(DOUBLE))); assertEquals(columnWiseUnion(tupleDomain1, tupleDomain2), expectedTupleDomain); }
TupleDomain<ColumnHandle> tupleDomain2 = TupleDomain.withColumnDomains( ImmutableMap.of(dsColumnHandle, Domain.create(ValueSet.ofRanges(Range.range(createUnboundedVarcharType(), utf8Slice("2015-07-01"), true, utf8Slice("2015-07-02"), true)), false))); Constraint<ColumnHandle> constraint2 = new Constraint<>(tupleDomain2, convertToPredicate(tupleDomain2)); List<ConnectorTableLayoutResult> tableLayoutResults2 = metadata.getTableLayouts(session, tableHandle, constraint2, Optional.empty());
private static Domain extractOrderableDomain(ComparisonExpression.Operator comparisonOperator, Type type, Object value, boolean complement) { checkArgument(value != null); switch (comparisonOperator) { case EQUAL: return Domain.create(complementIfNecessary(ValueSet.ofRanges(Range.equal(type, value)), complement), false); case GREATER_THAN: return Domain.create(complementIfNecessary(ValueSet.ofRanges(Range.greaterThan(type, value)), complement), false); case GREATER_THAN_OR_EQUAL: return Domain.create(complementIfNecessary(ValueSet.ofRanges(Range.greaterThanOrEqual(type, value)), complement), false); case LESS_THAN: return Domain.create(complementIfNecessary(ValueSet.ofRanges(Range.lessThan(type, value)), complement), false); case LESS_THAN_OR_EQUAL: return Domain.create(complementIfNecessary(ValueSet.ofRanges(Range.lessThanOrEqual(type, value)), complement), false); case NOT_EQUAL: return Domain.create(complementIfNecessary(ValueSet.ofRanges(Range.lessThan(type, value), Range.greaterThan(type, value)), complement), false); case IS_DISTINCT_FROM: // Need to potential complement the whole domain for IS_DISTINCT_FROM since it is null-aware return complementIfNecessary(Domain.create(ValueSet.ofRanges(Range.lessThan(type, value), Range.greaterThan(type, value)), true), complement); default: throw new AssertionError("Unhandled operator: " + comparisonOperator); } }
@Test public void testBuildQueryIn() { TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of( COL2, Domain.create(ValueSet.ofRanges(equal(createUnboundedVarcharType(), utf8Slice("hello")), equal(createUnboundedVarcharType(), utf8Slice("world"))), false))); Document query = MongoSession.buildQuery(tupleDomain); Document expected = new Document(COL2.getName(), new Document("$in", ImmutableList.of("hello", "world"))); assertEquals(query, expected); }
private static TupleDomain<HiveColumnHandle> toCompactTupleDomain(TupleDomain<ColumnHandle> effectivePredicate, int threshold) { checkArgument(effectivePredicate.getDomains().isPresent()); ImmutableMap.Builder<HiveColumnHandle, Domain> builder = ImmutableMap.builder(); for (Map.Entry<ColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) { HiveColumnHandle hiveColumnHandle = checkType(entry.getKey(), HiveColumnHandle.class, "ConnectorColumnHandle"); ValueSet values = entry.getValue().getValues(); ValueSet compactValueSet = values.getValuesProcessor().<Optional<ValueSet>>transform( ranges -> ranges.getRangeCount() > threshold ? Optional.of(ValueSet.ofRanges(ranges.getSpan())) : Optional.empty(), discreteValues -> discreteValues.getValues().size() > threshold ? Optional.of(ValueSet.all(values.getType())) : Optional.empty(), allOrNone -> Optional.empty()) .orElse(values); builder.put(hiveColumnHandle, Domain.create(compactValueSet, entry.getValue().isNullAllowed())); } return TupleDomain.withColumnDomains(builder.build()); }
/** * Reduces the number of discrete components in the Domain if there are too many. */ public Domain simplify() { ValueSet simplifiedValueSet = values.getValuesProcessor().<Optional<ValueSet>>transform( ranges -> { if (ranges.getOrderedRanges().size() <= 32) { return Optional.empty(); } return Optional.of(ValueSet.ofRanges(ranges.getSpan())); }, discreteValues -> { if (discreteValues.getValues().size() <= 32) { return Optional.empty(); } return Optional.of(ValueSet.all(values.getType())); }, allOrNone -> Optional.empty()) .orElse(values); return Domain.create(simplifiedValueSet, nullAllowed); }
@Test public void testExtractFixedValues() { assertEquals( TupleDomain.extractFixedValues(TupleDomain.withColumnDomains( ImmutableMap.<ColumnHandle, Domain>builder() .put(A, Domain.all(DOUBLE)) .put(B, Domain.singleValue(VARCHAR, utf8Slice("value"))) .put(C, Domain.onlyNull(BIGINT)) .put(D, Domain.create(ValueSet.ofRanges(Range.equal(BIGINT, 1L)), true)) .build())).get(), ImmutableMap.of( B, NullableValue.of(VARCHAR, utf8Slice("value")), C, NullableValue.asNull(BIGINT))); }
@Test public void testNotPushDoublePredicates() { IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(new TypeRegistry()); List<HiveColumnHandle> columns = ImmutableList.of( new HiveColumnHandle("quantity", HIVE_INT, parseTypeSignature(INTEGER), 0, REGULAR, Optional.empty()), new HiveColumnHandle("extendedprice", HIVE_DOUBLE, parseTypeSignature(StandardTypes.DOUBLE), 1, REGULAR, Optional.empty()), new HiveColumnHandle("discount", HIVE_DOUBLE, parseTypeSignature(StandardTypes.DOUBLE), 2, REGULAR, Optional.empty())); TupleDomain<HiveColumnHandle> tupleDomain = withColumnDomains( ImmutableMap.of( columns.get(0), Domain.create(ofRanges(Range.lessThan(BIGINT, 50L)), false), columns.get(1), Domain.create(ofRanges(Range.equal(DOUBLE, 0.05)), false), columns.get(2), Domain.create(ofRanges(Range.range(DOUBLE, 0.0, true, 0.02, true)), false))); assertEquals("SELECT s._1, s._2, s._3 FROM S3Object s WHERE ((case s._1 when '' then null else CAST(s._1 AS INT) end < 50))", queryBuilder.buildSql(columns, tupleDomain)); } }
@Override public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layoutHandle, SplitSchedulingStrategy splitSchedulingStrategy) { AtopTableLayoutHandle handle = (AtopTableLayoutHandle) layoutHandle; AtopTableHandle table = handle.getTableHandle(); List<ConnectorSplit> splits = new ArrayList<>(); ZonedDateTime end = ZonedDateTime.now(timeZone); for (Node node : nodeManager.getWorkerNodes()) { ZonedDateTime start = end.minusDays(maxHistoryDays - 1).withHour(0).withMinute(0).withSecond(0).withNano(0); while (start.isBefore(end)) { ZonedDateTime splitEnd = start.withHour(23).withMinute(59).withSecond(59).withNano(0); Domain splitDomain = Domain.create(ValueSet.ofRanges(Range.range(TIMESTAMP_WITH_TIME_ZONE, 1000 * start.toEpochSecond(), true, 1000 * splitEnd.toEpochSecond(), true)), false); if (handle.getStartTimeConstraint().overlaps(splitDomain) && handle.getEndTimeConstraint().overlaps(splitDomain)) { splits.add(new AtopSplit(table.getTable(), node.getHostAndPort(), start.toEpochSecond(), start.getZone())); } start = start.plusDays(1).withHour(0).withMinute(0).withSecond(0).withNano(0); } } return new FixedSplitSource(splits); } }