@Test(dataProvider = "rowCount") public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception { List<TestColumn> writeColumns = getTestColumnsSupportedByParquet(); // test index-based access List<TestColumn> readColumns = writeColumns.stream() .map(column -> new TestColumn( column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())) .collect(toList()); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); // test name-based access readColumns = Lists.reverse(writeColumns); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withSession(parquetPageSourceSessionUseName) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
.withReadColumns(ImmutableList.of(readColumn)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetPageSourceSession) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
@Test(dataProvider = "rowCount") public void testOrcUseColumnNames(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(ORC) .withWriteColumns(TEST_COLUMNS) .withRowsCount(rowCount) .withReadColumns(Lists.reverse(TEST_COLUMNS)) .withSession(session) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, true, HDFS_ENVIRONMENT, STATS)); }
.withReadColumns(ImmutableList.of(readColumn)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetPageSourceSession) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); .withReadColumns(ImmutableList.of(readColumn)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
@Test(dataProvider = "rowCount") public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception { List<TestColumn> writeColumns = getTestColumnsSupportedByParquet(); // test index-based access List<TestColumn> readColumns = writeColumns.stream() .map(column -> new TestColumn( column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())) .collect(toList()); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); // test name-based access readColumns = Lists.reverse(writeColumns); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withSession(parquetPageSourceSessionUseName) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrcUseColumnNames(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(ORC) .withWriteColumns(TEST_COLUMNS) .withRowsCount(rowCount) .withReadColumns(Lists.reverse(TEST_COLUMNS)) .withSession(session) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, true, HDFS_ENVIRONMENT, STATS)); }
public FileFormatAssertion withColumns(List<TestColumn> inputColumns) { withWriteColumns(inputColumns); withReadColumns(inputColumns); return this; }
public FileFormatAssertion withColumns(List<TestColumn> inputColumns) { withWriteColumns(inputColumns); withReadColumns(inputColumns); return this; }