@Test public void testIntegerGt() { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("id", 79)).eval(FILE); Assert.assertFalse("Should not match: always false", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("id", 78)).eval(FILE); Assert.assertFalse("Should not match: 77 and less not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("id", 30)).eval(FILE); Assert.assertFalse("Should not match: 30 not in range", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, greaterThan("id", 29)).eval(FILE); Assert.assertTrue("Should match: all values in range", shouldRead); }
@Test public void testColumnFallbackOrNotDictionaryEncoded() { Expression[] exprs = new Expression[] { lessThan("non_dict", "a"), lessThanOrEqual("non_dict", "a"), equal("non_dict", "a"), greaterThan("non_dict", "a"), greaterThanOrEqual("non_dict", "a"), notNull("non_dict"), isNull("non_dict"), notEqual("non_dict", "a") }; for (Expression expr : exprs) { boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, expr) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA, DICTIONARY_STORE); Assert.assertTrue("Should read: dictionary cannot be found: " + expr, shouldRead); } }
@Test public void testMissingStats() { Expression[] exprs = new Expression[] { lessThan("id", 5), lessThanOrEqual("id", 30), equal("id", 70), greaterThan("id", 78), greaterThanOrEqual("id", 90), notEqual("id", 101), isNull("id"), notNull("id") }; for (Expression expr : exprs) { boolean shouldRead = new InclusiveManifestEvaluator(SPEC, expr).eval(NO_STATS); Assert.assertTrue("Should read when missing stats for expr: " + expr, shouldRead); } }
@Test public void testIntegerGt() { boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, greaterThan("id", 85)).eval(FILE); Assert.assertFalse("Should not read: id range above upper bound (85 < 79)", shouldRead); shouldRead = new InclusiveMetricsEvaluator(SCHEMA, greaterThan("id", 79)).eval(FILE); Assert.assertFalse("Should not read: id range above upper bound (79 is not > 79)", shouldRead); shouldRead = new InclusiveMetricsEvaluator(SCHEMA, greaterThan("id", 78)).eval(FILE); Assert.assertTrue("Should read: one possible id", shouldRead); shouldRead = new InclusiveMetricsEvaluator(SCHEMA, greaterThan("id", 75)).eval(FILE); Assert.assertTrue("Should read: may possible ids", shouldRead); }
@Test public void testMissingStats() { Expression[] exprs = new Expression[] { lessThan("no_stats", "a"), lessThanOrEqual("no_stats", "b"), equal("no_stats", "c"), greaterThan("no_stats", "d"), greaterThanOrEqual("no_stats", "e"), notEqual("no_stats", "f"), isNull("no_stats"), notNull("no_stats") }; for (Expression expr : exprs) { boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, expr) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA); Assert.assertTrue("Should read when missing stats for expr: " + expr, shouldRead); } }
@Test public void testIntegerGt() { boolean shouldRead = new InclusiveManifestEvaluator(SPEC, greaterThan("id", 85)).eval(FILE); Assert.assertFalse("Should not read: id range above upper bound (85 < 79)", shouldRead); shouldRead = new InclusiveManifestEvaluator(SPEC, greaterThan("id", 79)).eval(FILE); Assert.assertFalse("Should not read: id range above upper bound (79 is not > 79)", shouldRead); shouldRead = new InclusiveManifestEvaluator(SPEC, greaterThan("id", 78)).eval(FILE); Assert.assertTrue("Should read: one possible id", shouldRead); shouldRead = new InclusiveManifestEvaluator(SPEC, greaterThan("id", 75)).eval(FILE); Assert.assertTrue("Should read: may possible ids", shouldRead); }
@Test public void testAnd() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, and(lessThan("id", 5), greaterThanOrEqual("id", 0))) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA); Assert.assertFalse("Should skip: and(false, false)", shouldRead); shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, and(greaterThan("id", 5), lessThanOrEqual("id", 30))) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA); Assert.assertTrue("Should read: and(true, true)", shouldRead); }
@Test public void testMultipleReferences() { Expression expr = or(and(equal("x", 7), lessThan("y", 100)), greaterThan("z", -100)); TestHelpers.assertAllReferencesBound("Multiple references", Binder.bind(STRUCT, expr)); }
@Test public void testAnd() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, and(lessThan("id", 5), greaterThanOrEqual("id", 0))).eval(FILE); Assert.assertFalse("Should skip: and(false, false)", shouldRead); shouldRead = new InclusiveMetricsEvaluator(SCHEMA, and(greaterThan("id", 5), lessThanOrEqual("id", 30))).eval(FILE); Assert.assertTrue("Should read: and(true, true)", shouldRead); }
@Test public void testNot() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, not(lessThan("id", 5))).eval(FILE); Assert.assertTrue("Should not match: not(false)", shouldRead); shouldRead = new StrictMetricsEvaluator(SCHEMA, not(greaterThan("id", 5))).eval(FILE); Assert.assertFalse("Should match: not(true)", shouldRead); }
@Test public void testAnd() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, and(lessThan("id", 5), greaterThanOrEqual("id", 0))) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA, DICTIONARY_STORE); Assert.assertFalse("Should skip: and(false, false)", shouldRead); shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, and(greaterThan("id", 5), lessThanOrEqual("id", 30))) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA, DICTIONARY_STORE); Assert.assertTrue("Should read: and(true, true)", shouldRead); }
@Test public void testNot() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(lessThan("id", 5))) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA); Assert.assertTrue("Should read: not(false)", shouldRead); shouldRead = new ParquetMetricsRowGroupFilter(SCHEMA, not(greaterThan("id", 5))) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA); Assert.assertFalse("Should skip: not(true)", shouldRead); }
@Test public void testAnd() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new InclusiveManifestEvaluator( SPEC, and(lessThan("id", 5), greaterThanOrEqual("id", 0))).eval(FILE); Assert.assertFalse("Should skip: and(false, false)", shouldRead); shouldRead = new InclusiveManifestEvaluator( SPEC, and(greaterThan("id", 5), lessThanOrEqual("id", 30))).eval(FILE); Assert.assertTrue("Should read: and(true, true)", shouldRead); }
@Test public void testNot() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(lessThan("id", 5))) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA, DICTIONARY_STORE); Assert.assertTrue("Should read: not(false)", shouldRead); shouldRead = new ParquetDictionaryRowGroupFilter(SCHEMA, not(greaterThan("id", 5))) .shouldRead(PARQUET_SCHEMA, ROW_GROUP_METADATA, DICTIONARY_STORE); Assert.assertFalse("Should skip: not(true)", shouldRead); }
@Test public void testMissingStats() { DataFile missingStats = new TestDataFile("file.parquet", Row.of(), 50); Expression[] exprs = new Expression[] { lessThan("no_stats", 5), lessThanOrEqual("no_stats", 30), equal("no_stats", 70), greaterThan("no_stats", 78), greaterThanOrEqual("no_stats", 90), notEqual("no_stats", 101), isNull("no_stats"), notNull("no_stats") }; for (Expression expr : exprs) { boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, expr).eval(missingStats); Assert.assertTrue("Should read when missing stats for expr: " + expr, shouldRead); } }
@Test public void testNot() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, not(lessThan("id", 5))).eval(FILE); Assert.assertTrue("Should read: not(false)", shouldRead); shouldRead = new InclusiveMetricsEvaluator(SCHEMA, not(greaterThan("id", 5))).eval(FILE); Assert.assertFalse("Should skip: not(true)", shouldRead); }
@Test public void testZeroRecordFile() { DataFile empty = new TestDataFile("file.parquet", Row.of(), 0); Expression[] exprs = new Expression[] { lessThan("id", 5), lessThanOrEqual("id", 30), equal("id", 70), greaterThan("id", 78), greaterThanOrEqual("id", 90), notEqual("id", 101), isNull("some_nulls"), notNull("some_nulls") }; for (Expression expr : exprs) { boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, expr).eval(empty); Assert.assertFalse("Should never read 0-record file: " + expr, shouldRead); } }
@Test public void testNot() { // this test case must use a real predicate, not alwaysTrue(), or binding will simplify it out boolean shouldRead = new InclusiveManifestEvaluator(SPEC, not(lessThan("id", 5))).eval(FILE); Assert.assertTrue("Should read: not(false)", shouldRead); shouldRead = new InclusiveManifestEvaluator(SPEC, not(greaterThan("id", 5))).eval(FILE); Assert.assertFalse("Should skip: not(true)", shouldRead); }
@Test public void testZeroRecordFile() { DataFile empty = new TestDataFile("file.parquet", Row.of(), 0); Expression[] exprs = new Expression[] { lessThan("id", 5), lessThanOrEqual("id", 30), equal("id", 70), greaterThan("id", 78), greaterThanOrEqual("id", 90), notEqual("id", 101), isNull("some_nulls"), notNull("some_nulls") }; for (Expression expr : exprs) { boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, expr).eval(empty); Assert.assertTrue("Should always match 0-record file: " + expr, shouldRead); } }
@Test public void testGreaterThan() { Evaluator evaluator = new Evaluator(STRUCT, greaterThan("x", 7)); Assert.assertFalse("7 > 7 => false", evaluator.eval(TestHelpers.Row.of(7, 8, null))); Assert.assertFalse("6 > 7 => false", evaluator.eval(TestHelpers.Row.of(6, 8, null))); Assert.assertTrue("8 > 7 => true", evaluator.eval(TestHelpers.Row.of(8, 8, null))); }