@Override public <T> Boolean notNull(BoundReference<T> ref) { // no need to check whether the field is required because binding evaluates that case // if the column has no non-null values, the expression cannot match Integer id = ref.fieldId(); Preconditions.checkNotNull(struct.field(id), "Cannot filter by nested column: %s", schema.findField(id)); if (valueCounts != null && valueCounts.containsKey(id) && nullCounts != null && nullCounts.containsKey(id) && valueCounts.get(id) - nullCounts.get(id) == 0) { return ROWS_CANNOT_MATCH; } return ROWS_MIGHT_MATCH; }
@Override public <T> Boolean isNull(BoundReference<T> ref) { // no need to check whether the field is required because binding evaluates that case // if the column has any non-null values, the expression does not match Integer id = ref.fieldId(); Preconditions.checkNotNull(struct.field(id), "Cannot filter by nested column: %s", schema.findField(id)); if (valueCounts != null && valueCounts.containsKey(id) && nullCounts != null && nullCounts.containsKey(id) && valueCounts.get(id) - nullCounts.get(id) == 0) { return ROWS_MUST_MATCH; } return ROWS_MIGHT_NOT_MATCH; }
@Override public <T> Boolean isNull(BoundReference<T> ref) { // no need to check whether the field is required because binding evaluates that case // if the column has no null values, the expression cannot match Integer id = ref.fieldId(); Preconditions.checkNotNull(struct.field(id), "Cannot filter by nested column: %s", schema.findField(id)); if (nullCounts != null && nullCounts.containsKey(id) && nullCounts.get(id) == 0) { return ROWS_CANNOT_MATCH; } return ROWS_MIGHT_MATCH; }
@Override public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) { Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (lowerBounds != null && lowerBounds.containsKey(id)) { T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id)); int cmp = lit.comparator().compare(lower, lit.value()); if (cmp > 0) { return ROWS_CANNOT_MATCH; } } if (upperBounds != null && upperBounds.containsKey(id)) { T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id)); int cmp = lit.comparator().compare(upper, lit.value()); if (cmp < 0) { return ROWS_CANNOT_MATCH; } } return ROWS_MIGHT_MATCH; }
@Override public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) { // Rows must match when: <-------X---Min----Max----------> Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (lowerBounds != null && lowerBounds.containsKey(id)) { T lower = Conversions.fromByteBuffer(field.type(), lowerBounds.get(id)); int cmp = lit.comparator().compare(lower, lit.value()); if (cmp >= 0) { return ROWS_MUST_MATCH; } } return ROWS_MIGHT_NOT_MATCH; }
@Override public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) { // Rows must match when: <-------X---Min----Max----------> Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (lowerBounds != null && lowerBounds.containsKey(id)) { T lower = Conversions.fromByteBuffer(field.type(), lowerBounds.get(id)); int cmp = lit.comparator().compare(lower, lit.value()); if (cmp > 0) { return ROWS_MUST_MATCH; } } return ROWS_MIGHT_NOT_MATCH; }
@Override public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) { // Rows must match when: <----------Min----Max---X-------> Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (upperBounds != null && upperBounds.containsKey(id)) { T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id)); int cmp = lit.comparator().compare(upper, lit.value()); if (cmp < 0) { return ROWS_MUST_MATCH; } } return ROWS_MIGHT_NOT_MATCH; }
@Override public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) { // Rows must match when: <----------Min----Max---X-------> Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (upperBounds != null && upperBounds.containsKey(id)) { T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id)); int cmp = lit.comparator().compare(upper, lit.value()); if (cmp <= 0) { return ROWS_MUST_MATCH; } } return ROWS_MIGHT_NOT_MATCH; }
@Override public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) { Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (lowerBounds != null && lowerBounds.containsKey(id)) { T lower = Conversions.fromByteBuffer(field.type(), lowerBounds.get(id)); int cmp = lit.comparator().compare(lower, lit.value()); if (cmp >= 0) { return ROWS_CANNOT_MATCH; } } return ROWS_MIGHT_MATCH; }
@Override public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) { Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (upperBounds != null && upperBounds.containsKey(id)) { T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id)); int cmp = lit.comparator().compare(upper, lit.value()); if (cmp <= 0) { return ROWS_CANNOT_MATCH; } } return ROWS_MIGHT_MATCH; }
@Override public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) { Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (lowerBounds != null && lowerBounds.containsKey(id)) { T lower = Conversions.fromByteBuffer(field.type(), lowerBounds.get(id)); int cmp = lit.comparator().compare(lower, lit.value()); if (cmp > 0) { return ROWS_CANNOT_MATCH; } } return ROWS_MIGHT_MATCH; }
@Test public void testStructs() throws Exception { Types.StructType struct = Types.StructType.of( Types.NestedField.required(34, "Name!", Types.StringType.get()), Types.NestedField.optional(35, "col", Types.DecimalType.of(38, 2))); Type copy = TestHelpers.roundTripSerialize(struct); Assert.assertEquals("Struct serialization should be equal to starting type", struct, copy); Type stringType = copy.asNestedType().asStructType().fieldType("Name!"); Assert.assertSame("Struct serialization should preserve identity type", Types.StringType.get(), stringType); Type decimalType = copy.asNestedType().asStructType().field(35).type(); Assert.assertEquals("Struct serialization should support id lookup", Types.DecimalType.of(38, 2), decimalType); }
@Override public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) { Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); Boolean hasNonDictPage = isFallback.get(id); if (hasNonDictPage == null || hasNonDictPage) { return ROWS_MIGHT_MATCH; } Set<T> dictionary = dict(id, lit.comparator()); if (dictionary.size() > 1) { return ROWS_MIGHT_MATCH; } return dictionary.contains(lit.value()) ? ROWS_CANNOT_MATCH : ROWS_MIGHT_MATCH; }
@Override public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) { Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); if (upperBounds != null && upperBounds.containsKey(id)) { T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id)); int cmp = lit.comparator().compare(upper, lit.value()); if (cmp < 0) { return ROWS_CANNOT_MATCH; } } return ROWS_MIGHT_MATCH; }
@Override public Type field(Types.NestedField field, Supplier<Type> future) { Preconditions.checkArgument(sourceType.isStructType(), "Not a struct: " + sourceType); Types.StructType sourceStruct = sourceType.asStructType(); this.sourceType = sourceStruct.field(field.fieldId()).type(); try { return future.get(); } finally { sourceType = sourceStruct; } }
private static <T> List<T> visitFields(Types.StructType struct, GroupType group, TypeWithSchemaVisitor<T> visitor) { List<T> results = Lists.newArrayListWithExpectedSize(group.getFieldCount()); for (Type field : group.getFields()) { int id = -1; if (field.getId() != null) { id = field.getId().intValue(); } Types.NestedField iField = (struct != null && id >= 0) ? struct.field(id) : null; results.add(visitField(iField, field, visitor)); } return results; }
@Override public Type fieldType(String name) { NestedField field = field(name); if (field != null) { return field.type(); } return null; }
@Override public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) { Integer id = ref.fieldId(); Types.NestedField field = struct.field(id); Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id)); Boolean hasNonDictPage = isFallback.get(id); if (hasNonDictPage == null || hasNonDictPage) { return ROWS_MIGHT_MATCH; } Set<T> dictionary = dict(id, lit.comparator()); return dictionary.contains(lit.value()) ? ROWS_MIGHT_MATCH : ROWS_CANNOT_MATCH; }
@Override public Type field(Types.NestedField field, Supplier<Type> future) { Preconditions.checkArgument(sourceType.isStructType(), "Not a struct: " + sourceType); Types.StructType sourceStruct = sourceType.asStructType(); Types.NestedField sourceField = sourceStruct.field(field.name()); this.sourceType = sourceField.type(); try { return future.get(); } finally { sourceType = sourceStruct; } }
@Override public <T> Boolean notNull(BoundReference<T> ref) { // no need to check whether the field is required because binding evaluates that case // if the column has any null values, the expression does not match Integer id = ref.fieldId(); Preconditions.checkNotNull(struct.field(id), "Cannot filter by nested column: %s", schema.findField(id)); if (nullCounts != null && nullCounts.containsKey(id) && nullCounts.get(id) == 0) { return ROWS_MUST_MATCH; } return ROWS_MIGHT_NOT_MATCH; }