private void assertCurrentDateAtInstant(TimeZoneKey timeZoneKey, long instant) { long expectedDays = epochDaysInZone(timeZoneKey, instant); long dateTimeCalculation = currentDate(new TestingConnectorSession("test", Optional.empty(), Optional.empty(), timeZoneKey, US, instant, ImmutableList.of(), ImmutableMap.of(), isLegacyTimestamp(session))); assertEquals(dateTimeCalculation, expectedDays); }
@Test public void testGetTableStatisticsEmpty() { String partitionName = "p1=string1/p2=1234"; MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(partitionName, PartitionStatistics.empty())); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertEquals( statisticsProvider.getTableStatistics( session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName))), TableStatistics.empty()); }
@Test public void testGetTableStatisticsSampling() { MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> { assertEquals(table, TABLE); assertEquals(hivePartitions.size(), 1); return ImmutableMap.of(); }); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties( new HiveClientConfig().setPartitionStatisticsSampleSize(1), new OrcFileWriterConfig(), new ParquetFileWriterConfig()) .getSessionProperties()); statisticsProvider.getTableStatistics( session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string1/p2=1235"))); }
private ConnectorSession sampleSize(int sampleSize) { HiveSessionProperties properties = new HiveSessionProperties( getHiveClientConfig().setPartitionStatisticsSampleSize(sampleSize), new OrcFileWriterConfig(), new ParquetFileWriterConfig()); return new TestingConnectorSession(properties.getSessionProperties()); }
@Test(dataProvider = "rowCount") public void testRcBinaryOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // RCBinary interprets empty VARCHAR as nulls .filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")) // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test public void testGetTableStatisticsUnpartitioned() { PartitionStatistics statistics = PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())) .setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))) .build(); MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics)); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty()); TableStatistics expected = TableStatistics.builder() .setRowCount(Estimate.of(1000)) .setColumnStatistics( columnHandle, ColumnStatistics.builder() .setRange(new DoubleRange(-100, 100)) .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(300)) .build()) .build(); assertEquals( statisticsProvider.getTableStatistics( session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected); }
private static TestingConnectorSession getSession(HiveClientConfig config) { return new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); }
String partitionName = "p1=string1/p2=1234"; MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(partitionName, corruptedStatistics)); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties( new HiveClientConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), .isInstanceOf(PrestoException.class) .hasFieldOrPropertyWithValue("errorCode", HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode()); TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties( new HiveClientConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(),
protected ConnectorSession newSession() { return new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); }
@Test(dataProvider = "rowCount") public void testOrcOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(ORC) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testDwrfOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // DWRF does not support modern Hive types // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)) .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(DWRF) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new DwrfPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrcUseColumnNames(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(ORC) .withWriteColumns(TEST_COLUMNS) .withRowsCount(rowCount) .withReadColumns(Lists.reverse(TEST_COLUMNS)) .withSession(session) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, true, HDFS_ENVIRONMENT, STATS)); }
protected ConnectorSession newSession() { return new TestingConnectorSession(new HiveSessionProperties(getHiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); }
@Test(dataProvider = "rowCount") public void testRcTextOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toImmutableList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCTEXT) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
private static BackgroundHiveSplitLoader backgroundHiveSplitLoaderOfflinePartitions() { ConnectorSession connectorSession = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setMaxSplitSize(new DataSize(1.0, GIGABYTE)), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); return new BackgroundHiveSplitLoader( SIMPLE_TABLE, createPartitionMetadataWithOfflinePartitions(), TupleDomain.all(), createBucketSplitInfo(Optional.empty(), Optional.empty()), connectorSession, new TestingHdfsEnvironment(), new NamenodeStats(), new TestingDirectoryLister(TEST_FILES), directExecutor(), 2, false); }
.build(); MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(partitionName, statistics)); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty()); TableStatistics expected = TableStatistics.builder()
private static BackgroundHiveSplitLoader backgroundHiveSplitLoader( List<LocatedFileStatus> files, TupleDomain<HiveColumnHandle> compactEffectivePredicate, Optional<HiveBucketFilter> hiveBucketFilter, Table table, Optional<HiveBucketHandle> bucketHandle) { List<HivePartitionMetadata> hivePartitionMetadatas = ImmutableList.of( new HivePartitionMetadata( new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.empty(), ImmutableMap.of())); ConnectorSession connectorSession = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setMaxSplitSize(new DataSize(1.0, GIGABYTE)), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); return new BackgroundHiveSplitLoader( table, hivePartitionMetadatas, compactEffectivePredicate, createBucketSplitInfo(bucketHandle, hiveBucketFilter), connectorSession, new TestingHdfsEnvironment(), new NamenodeStats(), new TestingDirectoryLister(files), EXECUTOR, 2, false); }
private static void assertCurrentDateAtInstant(TimeZoneKey timeZoneKey, long instant) { long expectedDays = epochDaysInZone(timeZoneKey, instant); long dateTimeCalculation = currentDate(new TestingConnectorSession("test", timeZoneKey, US, instant, ImmutableList.of(), ImmutableMap.of())); assertEquals(dateTimeCalculation, expectedDays); }
private ConnectorSession sampleSize(int sampleSize) { HiveSessionProperties properties = new HiveSessionProperties( getHiveClientConfig().setPartitionStatisticsSampleSize(sampleSize), new OrcFileWriterConfig(), new ParquetFileWriterConfig()); return new TestingConnectorSession(properties.getSessionProperties()); }
@Test(dataProvider = "rowCount") public void testOrcUseColumnNames(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(ORC) .withWriteColumns(TEST_COLUMNS) .withRowsCount(rowCount) .withReadColumns(Lists.reverse(TEST_COLUMNS)) .withSession(session) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, true, HDFS_ENVIRONMENT, STATS)); }