@Override protected void createPartitionSublists() { List<PartitionLocation> locations = new LinkedList<>(); HiveReadEntry origEntry = ((HiveScan) scanRel.getGroupScan()).getHiveReadEntry(); for (Partition partition: origEntry.getPartitions()) { locations.add(new HivePartitionLocation(partition.getValues(), partition.getSd().getLocation())); } locationSuperList = Lists.partition(locations, PartitionDescriptor.PARTITION_BATCH_SIZE); sublistsCreated = true; }
@Override protected void createPartitionSublists() { Set<String> fileLocations = groupScan.getFileSet(); List<PartitionLocation> locations = new LinkedList<>(); for (String file : fileLocations) { locations.add(new ParquetPartitionLocation(file)); } locationSuperList = Lists.partition(locations, PartitionDescriptor.PARTITION_BATCH_SIZE); sublistsCreated = true; }
@Override protected void createPartitionSublists() { final Pair<Collection<String>, Boolean> fileLocationsAndStatus = getFileLocationsAndStatus(); List<PartitionLocation> locations = new LinkedList<>(); boolean hasDirsOnly = fileLocationsAndStatus.right; final String selectionRoot = getBaseTableLocation(); // map used to map the partition keys (dir0, dir1, ..), to the list of partitions that share the same partition keys. // For example, // 1990/Q1/1.parquet, 2.parquet // would have <1990, Q1> as key, and value as list of partition location for 1.parquet and 2.parquet. HashMap<List<String>, List<PartitionLocation>> dirToFileMap = new HashMap<>(); // Figure out the list of leaf subdirectories. For each leaf subdirectory, find the list of files (DFSFilePartitionLocation) // it contains. for (String file: fileLocationsAndStatus.left) { DFSFilePartitionLocation dfsFilePartitionLocation = new DFSFilePartitionLocation(MAX_NESTED_SUBDIRS, selectionRoot, file, hasDirsOnly); final String[] dirs = dfsFilePartitionLocation.getDirs(); final List<String> dirList = Arrays.asList(dirs); if (!dirToFileMap.containsKey(dirList)) { dirToFileMap.put(dirList, new ArrayList<PartitionLocation>()); } dirToFileMap.get(dirList).add(dfsFilePartitionLocation); } // build a list of DFSDirPartitionLocation. for (final List<String> dirs : dirToFileMap.keySet()) { locations.add( new DFSDirPartitionLocation(dirs.toArray(new String[dirs.size()]), dirToFileMap.get(dirs))); } locationSuperList = Lists.partition(locations, PartitionDescriptor.PARTITION_BATCH_SIZE); sublistsCreated = true; }