com.netflix.iceberg.spark.source.TestFilteredScan.pushFilters java code examples

private void pushFilters(DataSourceReader reader,
             com.netflix.iceberg.expressions.Expression... filters) {
 Expression[] expressions = new Expression[filters.length];
 for (int i = 0; i < filters.length; i += 1) {
  expressions[i] = SparkExpressions.convert(filters[i], SCHEMA);
 }
 pushFilters(reader, expressions);
}

@Test
public void testUnpartitionedTimestampFilter() {
 DataSourceOptions options = new DataSourceOptions(ImmutableMap.of(
   "path", unpartitioned.toString())
 );
 IcebergSource source = new IcebergSource();
 DataSourceReader reader = source.createReader(options);
 pushFilters(reader, Expressions.lessThan("ts", "2017-12-22T00:00:00+00:00"));
 List<DataReaderFactory<UnsafeRow>> tasks = planTasks(reader);
 Assert.assertEquals("Should only create one task for a small file", 1, tasks.size());
 assertEqualsSafe(SCHEMA.asStruct(), expected(5,6,7,8,9),
   read(unpartitioned.toString(), "ts < cast('2017-12-22 00:00:00+00:00' as timestamp)"));
}

@Test
public void testUnpartitionedIDFilters() {
 DataSourceOptions options = new DataSourceOptions(ImmutableMap.of(
   "path", unpartitioned.toString())
 );
 IcebergSource source = new IcebergSource();
 for (int i = 0; i < 10; i += 1) {
  DataSourceReader reader = source.createReader(options);
  pushFilters(reader, Expressions.equal("id", i));
  List<DataReaderFactory<UnsafeRow>> tasks = planTasks(reader);
  Assert.assertEquals("Should only create one task for a small file", 1, tasks.size());
  // validate row filtering
  assertEqualsSafe(SCHEMA.asStruct(), expected(i),
    read(unpartitioned.toString(), "id = " + i));
 }
}

@Test
public void testBucketPartitionedIDFilters() {
 File location = buildPartitionedTable("bucketed_by_id", BUCKET_BY_ID, "bucket4", "id");
 DataSourceOptions options = new DataSourceOptions(ImmutableMap.of(
   "path", location.toString())
 );
 IcebergSource source = new IcebergSource();
 DataSourceReader unfiltered = source.createReader(options);
 Assert.assertEquals("Unfiltered table should created 4 read tasks",
   4, planTasks(unfiltered).size());
 for (int i = 0; i < 10; i += 1) {
  DataSourceReader reader = source.createReader(options);
  pushFilters(reader, Expressions.equal("id", i));
  List<DataReaderFactory<UnsafeRow>> tasks = planTasks(reader);
  // validate predicate push-down
  Assert.assertEquals("Should create one task for a single bucket", 1, tasks.size());
  // validate row filtering
  assertEqualsSafe(SCHEMA.asStruct(), expected(i), read(location.toString(), "id = " + i));
 }
}

@Test
public void testTrunctateDataPartitionedFilters() {
 File location = buildPartitionedTable("trunc", PARTITION_BY_FIRST_LETTER, "trunc1", "data");
 DataSourceOptions options = new DataSourceOptions(ImmutableMap.of(
   "path", location.toString())
 );
 IcebergSource source = new IcebergSource();
 DataSourceReader unfiltered = source.createReader(options);
 Assert.assertEquals("Unfiltered table should have created 9 read tasks",
   9, planTasks(unfiltered).size());
 {
  DataSourceReader reader = source.createReader(options);
  pushFilters(reader, Expressions.equal("data", "goldfish"));
  List<DataReaderFactory<UnsafeRow>> tasks = planTasks(reader);
  Assert.assertEquals("Should create 1 task for 'goldfish' (g)", 1, tasks.size());
 }
 {
  DataSourceReader reader = source.createReader(options);
  pushFilters(reader, col("data").$eq$eq$eq("goldfish").expr());
  List<DataReaderFactory<UnsafeRow>> tasks = planTasks(reader);
  Assert.assertEquals("Should create 1 task for 'goldfish' (g)", 1, tasks.size());
 }
 assertEqualsSafe(SCHEMA.asStruct(), expected(9),
   read(location.toString(), "data = 'goldfish'"));
}

@Test
public void testHourPartitionedTimestampFilters() {
 File location = buildPartitionedTable("partitioned_by_hour", PARTITION_BY_HOUR, "ts_hour", "ts");
 DataSourceOptions options = new DataSourceOptions(ImmutableMap.of(
   "path", location.toString())
 );
 IcebergSource source = new IcebergSource();
 DataSourceReader unfiltered = source.createReader(options);
 Assert.assertEquals("Unfiltered table should created 9 read tasks",
   9, planTasks(unfiltered).size());
 {
  DataSourceReader reader = source.createReader(options);
  pushFilters(reader, Expressions.lessThan("ts", "2017-12-22T00:00:00+00:00"));
  List<DataReaderFactory<UnsafeRow>> tasks = planTasks(reader);
  Assert.assertEquals("Should create 4 tasks for 2017-12-21: 15, 17, 21, 22", 4, tasks.size());
  assertEqualsSafe(SCHEMA.asStruct(), expected(8, 9, 7, 6, 5),
    read(location.toString(), "ts < cast('2017-12-22 00:00:00+00:00' as timestamp)"));
 }
 {
  DataSourceReader reader = source.createReader(options);
  pushFilters(reader, Expressions.and(
    Expressions.greaterThan("ts", "2017-12-22T06:00:00+00:00"),
    Expressions.lessThan("ts", "2017-12-22T08:00:00+00:00")));
  List<DataReaderFactory<UnsafeRow>> tasks = planTasks(reader);
  Assert.assertEquals("Should create 2 tasks for 2017-12-22: 6, 7", 2, tasks.size());
  assertEqualsSafe(SCHEMA.asStruct(), expected(2, 1), read(location.toString(),
    "ts > cast('2017-12-22 06:00:00+00:00' as timestamp) and " +
      "ts < cast('2017-12-22 08:00:00+00:00' as timestamp)"));
 }
}

pushFilters(reader, Expressions.lessThan("ts", "2017-12-22T00:00:00+00:00"));
pushFilters(reader, col("ts").cast(DateType$.MODULE$).$eq$eq$eq(lit(day)).expr());
pushFilters(reader, to_date(col("ts")).$eq$eq$eq(lit(day)).expr());
pushFilters(reader, Expressions.and(
  Expressions.greaterThan("ts", "2017-12-22T06:00:00+00:00"),
  Expressions.lessThan("ts", "2017-12-22T08:00:00+00:00")));

Popular methods of TestFilteredScan

Popular in Java

Creating JSON documents from java classes using gson
addToBackStack (FragmentTransaction)
setContentView (Activity)
getContentResolver (Context)
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
Table (org.hibernate.mapping)
A relational table
Top 12 Jupyter Notebook extensions

How to use pushFiltersmethodin com.netflix.iceberg.spark.source.TestFilteredScan

Best Java code snippets using com.netflix.iceberg.spark.source.TestFilteredScan.pushFilters (Showing top 7 results out of 315)

How to use
pushFilters
method
in
com.netflix.iceberg.spark.source.TestFilteredScan