@Test public void testProject() { Iterable<Record> results = IcebergGenerics.read(sharedTable).select("id").build(); Set<Long> expected = Sets.newHashSet(); expected.addAll(Lists.transform(file1Records, record -> (Long) record.getField("id"))); expected.addAll(Lists.transform(file2Records, record -> (Long) record.getField("id"))); expected.addAll(Lists.transform(file3Records, record -> (Long) record.getField("id"))); results.forEach(record -> Assert.assertEquals("Record should have one projected field", 1, record.size())); Assert.assertEquals("Should project only id columns", expected, Sets.newHashSet(transform(results, record -> (Long) record.getField("id")))); }
@Test public void testProjectWithMissingFilterColumn() { Iterable<Record> results = IcebergGenerics.read(sharedTable) .where(Expressions.greaterThanOrEqual("id", 1)) .where(Expressions.lessThan("id", 21)) .select("data").build(); Set<String> expected = Sets.newHashSet(); for (Record record : concat(file1Records, file2Records, file3Records)) { Long id = (Long) record.getField("id"); if (id >= 1 && id < 21) { expected.add(record.getField("data").toString()); } } results.forEach(record -> Assert.assertEquals("Record should have two projected fields", 2, record.size())); Assert.assertEquals("Should project correct rows", expected, Sets.newHashSet(transform(results, record -> record.getField("data").toString()))); }
@Test public void testFilter() { Iterable<Record> result = IcebergGenerics.read(sharedTable).where(lessThan("id", 3)).build(); Assert.assertEquals("Records should match file 1", Sets.newHashSet(file1Records), Sets.newHashSet(result)); result = IcebergGenerics.read(sharedTable).where(lessThanOrEqual("id", 1)).build(); Assert.assertEquals("Records should match file 1 without id 2", Sets.newHashSet(filter(file1Records, r -> (Long) r.getField("id") <= 1)), Sets.newHashSet(result)); }
Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project points list", projected.getField("points")); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project points list", record.getField("points"), projected.getField("points")); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project points list", projected.getField("points")); List<Record> points = (List<Record>) projected.getField("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); Record projectedP1 = points.get(0); Assert.assertEquals("Should project x", 1, (int) projectedP1.getField("x")); Assert.assertNull("Should not project y", projectedP1.getField("y")); Record projectedP2 = points.get(1); Assert.assertEquals("Should project x", 3, (int) projectedP2.getField("x")); Assert.assertNull("Should not project y", projectedP2.getField("y")); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project points list", projected.getField("points")); points = (List<Record>) projected.getField("points"); Assert.assertEquals("Should read 2 points", 2, points.size()); projectedP1 = points.get(0); Assert.assertNull("Should not project x", projectedP1.getField("x")); Assert.assertEquals("Should project y", 2, (int) projectedP1.getField("y")); projectedP2 = points.get(1); Assert.assertNull("Should not project x", projectedP2.getField("x")); Assert.assertEquals("Should project null y", null, projectedP2.getField("y"));
@Test public void testListProjection() throws IOException { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(10, "values", Types.ListType.ofOptional(11, Types.LongType.get())) ); List<Long> values = ImmutableList.of(56L, 57L, 58L); Record record = GenericRecord.create(writeSchema.asStruct()); record.setField("id", 34L); record.setField("values", values); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project values list", projected.getField("values")); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire list", values, projected.getField("values")); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire list", values, projected.getField("values")); }
Record projectedLocation = (Record) projected.getField("location"); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project location", projectedLocation); projectedLocation = (Record) projected.getField("location"); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project location", projected.getField("location")); Assert.assertNull("Should not project longitude", projectedLocation.getField("long")); Assert.assertEquals("Should project latitude", 52.995143f, (float) projectedLocation.getField("lat"), 0.000001f); projectedLocation = (Record) projected.getField("location"); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project location", projected.getField("location")); Assert.assertNull("Should not project latitutde", projectedLocation.getField("lat")); Assert.assertEquals("Should project longitude", -1.539054f, (float) projectedLocation.getField("long"), 0.000001f); projectedLocation = (Record) projected.getField("location"); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertNotNull("Should project location", projected.getField("location")); Assert.assertEquals("Should project latitude", 52.995143f, (float) projectedLocation.getField("lat"), 0.000001f); Assert.assertEquals("Should project longitude", -1.539054f, (float) projectedLocation.getField("long"), 0.000001f);
Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project properties map", projected.getField("properties")); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.getField("properties"))); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.getField("properties"))); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire map", properties, toStringMap((Map) projected.getField("properties")));
@Test public void testFullProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(schema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Record projected = writeAndRead("full_projection", schema, schema, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.getField("data")); Assert.assertTrue("Should contain the correct data value", cmp == 0); }
Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project locations map", projected.getField("locations")); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project locations map", record.getField("locations"), toStringMap((Map) projected.getField("locations"))); Assert.assertNull("Should not project id", projected.getField("id")); Map<String, ?> locations = toStringMap((Map) projected.getField("locations")); Assert.assertNotNull("Should project locations map", locations); Assert.assertEquals("Should contain L1 and L2", Assert.assertNotNull("L1 should not be null", projectedL1); Assert.assertEquals("L1 should contain lat", 53.992811f, (float) projectedL1.getField("lat"), 0.000001); Assert.assertNull("L1 should not contain long", projectedL1.getField("long")); Record projectedL2 = (Record) locations.get("L2"); Assert.assertNotNull("L2 should not be null", projectedL2); Assert.assertEquals("L2 should contain lat", 52.995143f, (float) projectedL2.getField("lat"), 0.000001); Assert.assertNull("L2 should not contain long", projectedL2.getField("long")); Assert.assertNull("Should not project id", projected.getField("id")); locations = toStringMap((Map) projected.getField("locations")); Assert.assertNotNull("Should project locations map", locations); Assert.assertEquals("Should contain L1 and L2", projectedL1 = (Record) locations.get("L1"); Assert.assertNotNull("L1 should not be null", projectedL1);
@Test public void testBasicProjection() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(writeSchema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record); Assert.assertNull("Should not project data", projected.getField("data")); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Schema dataOnly = new Schema( Types.NestedField.optional(1, "data", Types.StringType.get()) ); projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.getField("data")); Assert.assertTrue("Should contain the correct data value", cmp == 0); }
@Test public void testRename() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(writeSchema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Schema readSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "renamed", Types.StringType.get()) ); Record projected = writeAndRead("project_and_rename", writeSchema, readSchema, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.getField("renamed")); Assert.assertTrue("Should contain the correct data/renamed value", cmp == 0); }