@Test public void testHoodieWriteSupport() throws Exception { List<String> rowKeys = new ArrayList<>(); for (int i = 0; i < 1000; i++) { rowKeys.add(UUID.randomUUID().toString()); } String filePath = basePath + "/test.parquet"; writeParquetFile(filePath, rowKeys); // Read and verify List<String> rowKeysInFile = new ArrayList<>( ParquetUtils.readRowKeysFromParquet(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath))); Collections.sort(rowKeysInFile); Collections.sort(rowKeys); assertEquals("Did not read back the expected list of keys", rowKeys, rowKeysInFile); BloomFilter filterInFile = ParquetUtils.readBloomFilterFromParquetMetadata(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath)); for (String rowKey : rowKeys) { assertTrue("key should be found in bloom filter", filterInFile.mightContain(rowKey)); } }
assertEquals("file should contain 100 records", ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, testPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, 0, file1))).size(), 100); Path newFile = new Path(basePath, testPartitionPath + "/" + FSUtils.makeDataFileName(commitTime2, 0, file1)); assertEquals("file should contain 140 records", ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), 140);
assertEquals("file should contain 100 records", ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(), new Path(basePath, testPartitionPath + "/" + FSUtils.makeDataFileName(commitTime1, 0, file1))).size(), 100); Path newFile = new Path(basePath, testPartitionPath + "/" + FSUtils.makeDataFileName(commitTime2, 0, file1)); assertEquals("file should contain 140 records", ParquetUtils.readRowKeysFromParquet(jsc.hadoopConfiguration(), newFile).size(), 140);