@Test(dataProvider = "rowCount") public void testTextFile(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(column -> !column.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(TEXTFILE) .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); }
@Test(dataProvider = "rowCount") public void testRCBinary(int rowCount) throws Exception { // RCBinary does not support complex type as key of a map and interprets empty VARCHAR as nulls List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> { String name = testColumn.getName(); return !name.equals("t_map_null_key_complex_key_value") && !name.equals("t_empty_varchar"); }).collect(toList()); assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); }
@Test(dataProvider = "rowCount") public void testRcBinaryPageSource(int rowCount) throws Exception { // RCBinary does not support complex type as key of a map and interprets empty VARCHAR as nulls List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")) .collect(toList()); assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrc(int rowCount) throws Exception { assertThatFileFormat(ORC) .withColumns(TEST_COLUMNS) .withRowsCount(rowCount) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcTextPageSource(int rowCount) throws Exception { assertThatFileFormat(RCTEXT) .withColumns(TEST_COLUMNS) .withRowsCount(rowCount) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testTextFile(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(column -> !column.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(TEXTFILE) .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); }
@Test(dataProvider = "rowCount") public void testDwrf(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)) .collect(Collectors.toList()); assertThatFileFormat(DWRF) .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByPageSource(new DwrfPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testParquetPageSource(int rowCount) throws Exception { List<TestColumn> testColumns = getTestColumnsSupportedByParquet(); assertThatFileFormat(PARQUET) .withColumns(testColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
.withColumns(columns) .isFailingForPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage) .isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage); .withColumns(columns) .isFailingForPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage) .isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage); .withColumns(columns) .isFailingForPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); .withColumns(columns) .withSession(parquetPageSourceSession) .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage); .withColumns(columns) .isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage); .withColumns(columns) .isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
@Test(dataProvider = "rowCount") public void testJson(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // binary is not supported .filter(column -> !column.getName().equals("t_binary")) // non-string map keys are not supported .filter(column -> !column.getName().equals("t_map_tinyint")) .filter(column -> !column.getName().equals("t_map_smallint")) .filter(column -> !column.getName().equals("t_map_int")) .filter(column -> !column.getName().equals("t_map_bigint")) .filter(column -> !column.getName().equals("t_map_float")) .filter(column -> !column.getName().equals("t_map_double")) // null map keys are not supported .filter(column -> !column.getName().equals("t_map_null_key")) .filter(column -> !column.getName().equals("t_map_null_key_complex_key_value")) .filter(column -> !column.getName().equals("t_map_null_key_complex_value")) // decimal(38) is broken or not supported .filter(column -> !column.getName().equals("t_decimal_precision_38")) .filter(column -> !column.getName().equals("t_map_decimal_precision_38")) .filter(column -> !column.getName().equals("t_array_decimal_precision_38")) .collect(toList()); assertThatFileFormat(JSON) .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); }
@Test(dataProvider = "rowCount") public void testOrcOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(ORC) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testDwrfOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // DWRF does not support modern Hive types // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)) .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(DWRF) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new DwrfPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcBinaryOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // RCBinary interprets empty VARCHAR as nulls .filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")) // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcTextOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toImmutableList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCTEXT) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRCText(int rowCount) throws Exception { List<TestColumn> testColumns = ImmutableList.copyOf(filter(TEST_COLUMNS, testColumn -> { // TODO: This is a bug in the RC text reader // RC file does not support complex type as key of a map return !testColumn.getName().equals("t_struct_null") && !testColumn.getName().equals("t_map_null_key_complex_key_value"); })); assertThatFileFormat(RCTEXT) .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); }
@Test(dataProvider = "rowCount") public void testRcTextPageSource(int rowCount) throws Exception { assertThatFileFormat(RCTEXT) .withColumns(TEST_COLUMNS) .withRowsCount(rowCount) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testDwrf(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)) .collect(Collectors.toList()); assertThatFileFormat(DWRF) .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByPageSource(new DwrfPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testAvro(int rowCount) throws Exception { assertThatFileFormat(AVRO) .withColumns(getTestColumnsSupportedByAvro()) .withRowsCount(rowCount) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); }
@Test(dataProvider = "rowCount") public void testParquetPageSource(int rowCount) throws Exception { List<TestColumn> testColumns = getTestColumnsSupportedByParquet(); assertThatFileFormat(PARQUET) .withColumns(testColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrc(int rowCount) throws Exception { assertThatFileFormat(ORC) .withColumns(TEST_COLUMNS) .withRowsCount(rowCount) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); }