@Test public void testIntegerGtEq() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThanOrEqual("id", 80)).eval(FILE); Assert.assertFalse("Should not match: no values in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThanOrEqual("id", 79)).eval(FILE); Assert.assertFalse("Should not match: 78 and lower are not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThanOrEqual("id", 31)).eval(FILE); Assert.assertFalse("Should not match: 30 not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThanOrEqual("id", 30)).eval(FILE); Assert.assertTrue("Should match: all values in range", shouldRead); }
/** * Test whether the file may contain records that match the expression. * * @param file a data file * @return false if the file cannot contain rows that match the expression, true otherwise. */ public boolean eval(DataFile file) { // TODO: detect the case where a column is missing from the file using file's max field id. return visitor().eval(file); }
StrictMetricsEvaluator metricsEvaluator = new StrictMetricsEvaluator( ops.current().schema(), deleteExpression);
if (fileDelete || inclusive.eval(file.partition())) { ValidationException.check( fileDelete || strict.eval(file.partition()) || metricsEvaluator.eval(file), "Cannot delete file where some, but not all, rows match filter %s: %s", this.deleteExpression, file.path()); if (fileDelete || inclusive.eval(file.partition())) { ValidationException.check( fileDelete || strict.eval(file.partition()) || metricsEvaluator.eval(file), "Cannot delete file where some, but not all, rows match filter %s: %s", this.deleteExpression, file.path());
@Test public void testIntegerLtEq() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThanOrEqual("id", 29)).eval(FILE); Assert.assertFalse("Should not match: always false", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThanOrEqual("id", 30)).eval(FILE); Assert.assertFalse("Should not match: 31 and greater not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThanOrEqual("id", 79)).eval(FILE); Assert.assertTrue("Should match: all values in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThanOrEqual("id", 80)).eval(FILE); Assert.assertTrue("Should match: all values in range", shouldRead); }
@Test public void testIntegerLt() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThan("id", 30)).eval(FILE); Assert.assertFalse("Should not match: always false", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThan("id", 31)).eval(FILE); Assert.assertFalse("Should not match: 32 and greater not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThan("id", 79)).eval(FILE); Assert.assertFalse("Should not match: 79 not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, lessThan("id", 80)).eval(FILE); Assert.assertTrue("Should match: all values in range", shouldRead); }
@Test public void testIntegerGt() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("id", 79)).eval(FILE); Assert.assertFalse("Should not match: always false", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("id", 78)).eval(FILE); Assert.assertFalse("Should not match: 77 and less not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("id", 30)).eval(FILE); Assert.assertFalse("Should not match: 30 not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("id", 29)).eval(FILE); Assert.assertTrue("Should match: all values in range", shouldRead); }
@Test public void testAllNulls() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, notNull("all_nulls")).eval(FILE); Assert.assertFalse("Should not match: no non-null value in all null column", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, notNull("some_nulls")).eval(FILE); Assert.assertFalse("Should not match: column with some nulls contains a non-null value", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, notNull("no_nulls")).eval(FILE); Assert.assertTrue("Should match: non-null column contains no null values", shouldRead); }
@Test public void testNoNulls() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, isNull("all_nulls")).eval(FILE); Assert.assertTrue("Should match: all values are null", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, isNull("some_nulls")).eval(FILE); Assert.assertFalse("Should not match: not all values are null", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, isNull("no_nulls")).eval(FILE); Assert.assertFalse("Should not match: no values are null", shouldRead); }
@Test public void testRequiredColumn() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, notNull("required")).eval(FILE); Assert.assertTrue("Should match: required columns are always non-null", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, isNull("required")).eval(FILE); Assert.assertFalse("Should not match: required columns never contain null", shouldRead); }
@Test public void testIntegerEq() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, equal("id", 5)).eval(FILE); Assert.assertFalse("Should not match: all values != 5", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, equal("id", 30)).eval(FILE); Assert.assertFalse("Should not match: some values != 30", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, equal("id", 75)).eval(FILE); Assert.assertFalse("Should not match: some values != 75", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, equal("id", 79)).eval(FILE); Assert.assertFalse("Should not match: some values != 79", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, equal("id", 80)).eval(FILE); Assert.assertFalse("Should not match: some values != 80", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, equal("always_5", 5)).eval(FILE); Assert.assertTrue("Should match: all values == 5", shouldRead); }
@Test public void testIntegerNotEq() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, notEqual("id", 5)).eval(FILE); Assert.assertTrue("Should match: no values == 5", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, notEqual("id", 29)).eval(FILE); Assert.assertTrue("Should match: no values == 39", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, notEqual("id", 30)).eval(FILE); Assert.assertFalse("Should not match: some value may be == 30", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, notEqual("id", 75)).eval(FILE); Assert.assertFalse("Should not match: some value may be == 75", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, notEqual("id", 79)).eval(FILE); Assert.assertFalse("Should not match: some value may be == 79", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, notEqual("id", 80)).eval(FILE); Assert.assertTrue("Should match: no values == 80", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, notEqual("id", 85)).eval(FILE); Assert.assertTrue("Should read: no values == 85", shouldRead); }
@Test public void testMissingColumn() { TestHelpers.assertThrows("Should complain about missing column in expression", ValidationException.class, "Cannot find field 'missing'", () -> new StrictMetricsEvaluator(SCHEMA, lessThan("missing", 5)).eval(FILE)); }
@Test public void testNot() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, not(lessThan("id", 5))).eval(FILE); Assert.assertTrue("Should not match: not(false)", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, not(greaterThan("id", 5))).eval(FILE); Assert.assertFalse("Should match: not(true)", shouldRead); }
@Test public void testIntegerNotEqRewritten() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, not(equal("id", 5))).eval(FILE); Assert.assertTrue("Should match: no values == 5", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, not(equal("id", 29))).eval(FILE); Assert.assertTrue("Should match: no values == 39", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, not(equal("id", 30))).eval(FILE); Assert.assertFalse("Should not match: some value may be == 30", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, not(equal("id", 75))).eval(FILE); Assert.assertFalse("Should not match: some value may be == 75", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, not(equal("id", 79))).eval(FILE); Assert.assertFalse("Should not match: some value may be == 79", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, not(equal("id", 80))).eval(FILE); Assert.assertTrue("Should match: no values == 80", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, not(equal("id", 85))).eval(FILE); Assert.assertTrue("Should read: no values == 85", shouldRead); } }
@Test public void testAnd() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, and(greaterThan("id", 5), lessThanOrEqual("id", 30))).eval(FILE); Assert.assertFalse("Should not match: range overlaps data", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, and(lessThan("id", 5), greaterThanOrEqual("id", 0))).eval(FILE); Assert.assertFalse("Should match: range does not overlap data", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, and(lessThan("id", 85), greaterThanOrEqual("id", 0))).eval(FILE); Assert.assertTrue("Should match: range includes all data", shouldRead); }
@Test public void testOr() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, or(lessThan("id", 5), greaterThanOrEqual("id", 80))).eval(FILE); Assert.assertFalse("Should not match: no matching values", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, or(lessThan("id", 5), greaterThanOrEqual("id", 60))).eval(FILE); Assert.assertFalse("Should not match: some values do not match", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, or(lessThan("id", 5), greaterThanOrEqual("id", 30))).eval(FILE); Assert.assertTrue("Should match: all values match > 30", shouldRead); }
@Override public List<ManifestFile> apply(TableMetadata base) { if (validateAddedFiles) { PartitionSpec spec = writeSpec(); Expression rowFilter = rowFilter(); Expression inclusiveExpr = Projections.inclusive(spec).project(rowFilter); Evaluator inclusive = new Evaluator(spec.partitionType(), inclusiveExpr); Expression strictExpr = Projections.strict(spec).project(rowFilter); Evaluator strict = new Evaluator(spec.partitionType(), strictExpr); StrictMetricsEvaluator metrics = new StrictMetricsEvaluator( base.schema(), rowFilter); for (DataFile file : addedFiles()) { // the real test is that the strict or metrics test matches the file, indicating that all // records in the file match the filter. inclusive is used to avoid testing the metrics, // which is more complicated ValidationException.check( inclusive.eval(file.partition()) && (strict.eval(file.partition()) || metrics.eval(file)), "Cannot append file with rows that do not match filter: %s: %s", rowFilter, file.path()); } } return super.apply(base); } }
@Test public void testZeroRecordFile() { DataFile empty = new TestDataFile("file.parquet", Row.of(), 0); Expression[] exprs = new Expression[] { lessThan("id", 5), lessThanOrEqual("id", 30), equal("id", 70), greaterThan("id", 78), greaterThanOrEqual("id", 90), notEqual("id", 101), isNull("some_nulls"), notNull("some_nulls") }; for (Expression expr : exprs) { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, expr).eval(empty); Assert.assertTrue("Should always match 0-record file: " + expr, shouldRead); } }
@Test public void testMissingStats() { DataFile missingStats = new TestDataFile("file.parquet", Row.of(), 50); Expression[] exprs = new Expression[] { lessThan("no_stats", 5), lessThanOrEqual("no_stats", 30), equal("no_stats", 70), greaterThan("no_stats", 78), greaterThanOrEqual("no_stats", 90), notEqual("no_stats", 101), isNull("no_stats"), notNull("no_stats") }; for (Expression expr : exprs) { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, expr).eval(missingStats); Assert.assertFalse("Should never match when stats are missing for expr: " + expr, shouldRead); } }