@Test public void testFullScan() { Iterable<Record> results = IcebergGenerics.read(sharedTable).build(); Set<Record> expected = Sets.newHashSet(); expected.addAll(file1Records); expected.addAll(file2Records); expected.addAll(file3Records); Set<Record> records = Sets.newHashSet(results); Assert.assertEquals("Should produce correct number of records", expected.size(), records.size()); Assert.assertEquals("Random record set should match", Sets.newHashSet(expected), records); }
@Test public void testProject() { Iterable<Record> results = IcebergGenerics.read(sharedTable).select("id").build(); Set<Long> expected = Sets.newHashSet(); expected.addAll(Lists.transform(file1Records, record -> (Long) record.getField("id"))); expected.addAll(Lists.transform(file2Records, record -> (Long) record.getField("id"))); expected.addAll(Lists.transform(file3Records, record -> (Long) record.getField("id"))); results.forEach(record -> Assert.assertEquals("Record should have one projected field", 1, record.size())); Assert.assertEquals("Should project only id columns", expected, Sets.newHashSet(transform(results, record -> (Long) record.getField("id")))); }
@Test public void testRandomData() throws IOException { List<Record> expected = RandomGenericData.generate(SCHEMA, 1000, 435691832918L); File location = temp.newFolder(format.name()); Assert.assertTrue(location.delete()); Table table = TABLES.create(SCHEMA, PartitionSpec.unpartitioned(), ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name()), location.toString()); AppendFiles append = table.newAppend(); int fileNum = 0; int recordsPerFile = 200; Iterator<Record> iter = expected.iterator(); while (iter.hasNext()) { Path path = new Path(location.toString(), format.addExtension("file-" + fileNum)); int numRecords; List<Record> records = Lists.newArrayList(); for (numRecords = 0; numRecords < recordsPerFile && iter.hasNext(); numRecords += 1) { records.add(iter.next()); } writeFile(location.toString(), format.addExtension("file-" + fileNum), records); append.appendFile(fromInputFile(HadoopInputFile.fromPath(path, CONF), numRecords)); fileNum += 1; } append.commit(); Set<Record> records = Sets.newHashSet(IcebergGenerics.read(table).build()); Assert.assertEquals("Should produce correct number of records", expected.size(), records.size()); Assert.assertEquals("Random record set should match", Sets.newHashSet(expected), records); }
@Test public void testProjectWithMissingFilterColumn() { Iterable<Record> results = IcebergGenerics.read(sharedTable) .where(Expressions.greaterThanOrEqual("id", 1)) .where(Expressions.lessThan("id", 21)) .select("data").build(); Set<String> expected = Sets.newHashSet(); for (Record record : concat(file1Records, file2Records, file3Records)) { Long id = (Long) record.getField("id"); if (id >= 1 && id < 21) { expected.add(record.getField("data").toString()); } } results.forEach(record -> Assert.assertEquals("Record should have two projected fields", 2, record.size())); Assert.assertEquals("Should project correct rows", expected, Sets.newHashSet(transform(results, record -> record.getField("data").toString()))); }
@Test public void testFilter() { Iterable<Record> result = IcebergGenerics.read(sharedTable).where(lessThan("id", 3)).build(); Assert.assertEquals("Records should match file 1", Sets.newHashSet(file1Records), Sets.newHashSet(result)); result = IcebergGenerics.read(sharedTable).where(lessThanOrEqual("id", 1)).build(); Assert.assertEquals("Records should match file 1 without id 2", Sets.newHashSet(filter(file1Records, r -> (Long) r.getField("id") <= 1)), Sets.newHashSet(result)); }
/** * Returns a builder to configure a read of the given table that produces generic records. * * @param table an Iceberg table * @return a builder to configure the scan */ public static ScanBuilder read(Table table) { return new ScanBuilder(table); }