HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader( table, hivePartitions,
Properties schema = getPartitionSchema(table, partition.getPartition()); List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition()); TupleDomain<HiveColumnHandle> effectivePredicate = (TupleDomain<HiveColumnHandle>) compactEffectivePredicate; Path path = new Path(getPartitionLocation(table, partition.getPartition())); Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path); InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false); for (Path targetPath : getTargetPathsFromSymlink(fs, path)) { isForceLocalScheduling(session), s3SelectPushdownEnabled); lastResult = addSplitsToSource(targetSplits, splitFactory); if (stopped) { return COMPLETED_FUTURE; if (shouldUseFileSplitsFromInputFormat(inputFormat)) { if (tableBucketInfo.isPresent()) { throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName()); InputSplit[] splits = inputFormat.getSplits(jobConf, 0); return addSplitsToSource(splits, splitFactory); return hiveSplitSource.addToQueue(getBucketedSplits(path, fs, splitFactory, tableBucketInfo.get(), bucketConversion)); fileIterators.addLast(createInternalHiveSplitIterator(path, fs, splitFactory, splittable)); return COMPLETED_FUTURE;
private ListenableFuture<?> loadSplits() throws IOException { Iterator<InternalHiveSplit> splits = fileIterators.poll(); if (splits == null) { HivePartitionMetadata partition = partitions.poll(); if (partition == null) { return COMPLETED_FUTURE; } return loadPartition(partition); } while (splits.hasNext() && !stopped) { ListenableFuture<?> future = hiveSplitSource.addToQueue(splits.next()); if (!future.isDone()) { fileIterators.addFirst(splits); return future; } } // No need to put the iterator back, since it's either empty or we've stopped return COMPLETED_FUTURE; }
Properties schema = getPartitionSchema(table, partition.getPartition()); List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition()); TupleDomain<HiveColumnHandle> effectivePredicate = partition.getHivePartition().getEffectivePredicate(); Path path = new Path(getPartitionLocation(table, partition.getPartition())); Configuration configuration = hdfsEnvironment.getConfiguration(path); InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false); hiveSplitSource.addToQueue(createHiveSplits( partitionName, file.getPath().toString(), Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path); if (bucketFile.isPresent()) { FileStatus file = bucketFile.get(); boolean splittable = isSplittable(inputFormat, fs, file.getPath()); hiveSplitSource.addToQueue(createHiveSplits( partitionName, file.getPath().toString(),
return COMPLETED_FUTURE; loadPartition(partition); return COMPLETED_FUTURE; boolean splittable = isSplittable(files.getInputFormat(), hdfsEnvironment.getFileSystem(file.getPath()), file.getPath()); CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits( files.getPartitionName(), file.getPath().toString(),
private static BackgroundHiveSplitLoader backgroundHiveSplitLoader( List<LocatedFileStatus> files, TupleDomain<HiveColumnHandle> compactEffectivePredicate, Optional<HiveBucketFilter> hiveBucketFilter, Table table, Optional<HiveBucketHandle> bucketHandle) { List<HivePartitionMetadata> hivePartitionMetadatas = ImmutableList.of( new HivePartitionMetadata( new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.empty(), ImmutableMap.of())); ConnectorSession connectorSession = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setMaxSplitSize(new DataSize(1.0, GIGABYTE)), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); return new BackgroundHiveSplitLoader( table, hivePartitionMetadatas, compactEffectivePredicate, createBucketSplitInfo(bucketHandle, hiveBucketFilter), connectorSession, new TestingHdfsEnvironment(), new NamenodeStats(), new TestingDirectoryLister(files), EXECUTOR, 2, false); }
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader( connectorId, table.get(),
private static BackgroundHiveSplitLoader backgroundHiveSplitLoader( List<LocatedFileStatus> files, TupleDomain<HiveColumnHandle> compactEffectivePredicate, Optional<HiveBucketFilter> hiveBucketFilter, Table table, Optional<HiveBucketHandle> bucketHandle) { List<HivePartitionMetadata> hivePartitionMetadatas = ImmutableList.of( new HivePartitionMetadata( new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.empty(), ImmutableMap.of())); ConnectorSession connectorSession = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setMaxSplitSize(new DataSize(1.0, GIGABYTE)), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); return new BackgroundHiveSplitLoader( table, hivePartitionMetadatas, compactEffectivePredicate, createBucketSplitInfo(bucketHandle, hiveBucketFilter), connectorSession, new TestingHdfsEnvironment(), new NamenodeStats(), new TestingDirectoryLister(files), EXECUTOR, 2, false); }