/** * Read the footers of all the files under that path (recursively) * using summary files if possible * @param configuration the configuration to access the FS * @param fileStatus the root dir * @return all the footers * @throws IOException */ public static List<Footer> readFooters(Configuration configuration, FileStatus pathStatus, boolean skipRowGroups) throws IOException { List<FileStatus> files = listFiles(configuration, pathStatus); return readAllFootersInParallelUsingSummaryFiles(configuration, files, skipRowGroups); }
private static List<FileStatus> listFiles(Configuration conf, FileStatus fileStatus) throws IOException { if (fileStatus.isDir()) { FileSystem fs = fileStatus.getPath().getFileSystem(conf); FileStatus[] list = fs.listStatus(fileStatus.getPath(), HiddenFileFilter.INSTANCE); List<FileStatus> result = new ArrayList<FileStatus>(); for (FileStatus sub : list) { result.addAll(listFiles(conf, sub)); } return result; } else { return Arrays.asList(fileStatus); } }
/** * Read the footers of all the files under that path (recursively) * not using summary files. * rowGroups are not skipped * @param configuration the configuration to access the FS * @param fileStatus the root dir * @return all the footers * @throws IOException */ public static List<Footer> readAllFootersInParallel(Configuration configuration, FileStatus fileStatus) throws IOException { List<FileStatus> statuses = listFiles(configuration, fileStatus); return readAllFootersInParallel(configuration, statuses, false); }