public static List<FileStatus> listFilesRecursively(FileSystem fs, Path path) throws IOException { return listFilesRecursively(fs, path, NO_OP_PATH_FILTER); }
/** * Method to list out all files, or directory if no file exists, under a specified path. */ public static List<FileStatus> listMostNestedPathRecursively(FileSystem fs, Path path) throws IOException { return listMostNestedPathRecursively(fs, path, NO_OP_PATH_FILTER); }
/** * Helper method to list out all files under a specified path. The specified {@link PathFilter} is treated as a file * filter, that is it is only applied to file {@link Path}s. */ public static List<FileStatus> listFilesRecursively(FileSystem fs, Path path, PathFilter fileFilter) throws IOException { return listFilesRecursivelyHelper(fs, Lists.<FileStatus> newArrayList(), fs.getFileStatus(path), fileFilter, false); }
/** * Sets the {@link FsPermission}, owner, group for the path passed. And recursively to all directories and files under * it. */ private void setRecursivePermission(Path path, OwnerAndPermission ownerAndPermission) throws IOException { List<FileStatus> files = FileListUtils.listPathsRecursively(this.fs, path, FileListUtils.NO_OP_PATH_FILTER); // Set permissions bottom up. Permissions are set to files first and then directories Collections.reverse(files); for (FileStatus file : files) { safeSetPathPermission(file.getPath(), addExecutePermissionsIfRequired(file, ownerAndPermission)); } }
/** * Method to list out all files, or directory if no file exists, under a specified path. * The specified {@link PathFilter} is treated as a file filter, that is it is only applied to file {@link Path}s. */ public static List<FileStatus> listMostNestedPathRecursively(FileSystem fs, Path path, PathFilter fileFilter) throws IOException { return listMostNestedPathRecursivelyHelper(fs, Lists.<FileStatus> newArrayList(), fs.getFileStatus(path), fileFilter); }
/** * Helper method to list out all paths under a specified path. If the {@link org.apache.hadoop.fs.FileSystem} is * unable to list the contents of a relevant directory, will log an error and skip. */ public static List<FileStatus> listPathsRecursively(FileSystem fs, Path path, PathFilter fileFilter) throws IOException { return listPathsRecursivelyHelper(fs, Lists.<FileStatus> newArrayList(), fs.getFileStatus(path), fileFilter); }
for (FileStatus fileStatus : FileListUtils.listPathsRecursively(this.fs, hdfsNewVersionPath, FileListUtils.NO_OP_PATH_FILTER)) { this.fs.setPermission(fileStatus.getPath(), deploymentConfig.getStorePermissions());
private static List<FileStatus> listMostNestedPathRecursivelyHelper(FileSystem fs, List<FileStatus> files, FileStatus fileStatus, PathFilter fileFilter) throws IOException { if (fileStatus.isDirectory()) { FileStatus[] curFileStatus = fs.listStatus(fileStatus.getPath()); if (ArrayUtils.isEmpty(curFileStatus)) { files.add(fileStatus); } else { for (FileStatus status : curFileStatus) { listMostNestedPathRecursivelyHelper(fs, files, status, fileFilter); } } } else if (fileFilter.accept(fileStatus.getPath())) { files.add(fileStatus); } return files; }
private static List<FileStatus> listPathsRecursivelyHelper(FileSystem fs, List<FileStatus> files, FileStatus fileStatus, PathFilter fileFilter) { if (fileFilter.accept(fileStatus.getPath())) { files.add(fileStatus); } if (fileStatus.isDirectory()) { try { for (FileStatus status : fs.listStatus(fileStatus.getPath())) { listPathsRecursivelyHelper(fs, files, status, fileFilter); } } catch (IOException ioe) { LOG.error("Could not list contents of path " + fileStatus.getPath()); } } return files; } }
public static List<FileStatus> listFilesRecursively(FileSystem fs, Iterable<Path> paths) throws IOException { List<FileStatus> results = Lists.newArrayList(); for (Path path : paths) { results.addAll(listFilesRecursively(fs, path)); } return results; }
public static List<FileStatus> listMostNestedPathRecursively(FileSystem fs, Iterable<Path> paths) throws IOException { List<FileStatus> results = Lists.newArrayList(); for (Path path : paths) { results.addAll(listMostNestedPathRecursively(fs, path)); } return results; }
/** * Helper method to list out all files under a specified path. If applyFilterToDirectories is false, the supplied * {@link PathFilter} will only be applied to files. */ public static List<FileStatus> listFilesRecursively(FileSystem fs, Path path, PathFilter fileFilter, boolean applyFilterToDirectories) throws IOException { return listFilesRecursivelyHelper(fs, Lists.<FileStatus> newArrayList(), fs.getFileStatus(path), fileFilter, applyFilterToDirectories); }
@VisibleForTesting protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter fileFilter) throws IOException { try { return FileListUtils.listFilesRecursively(fs, path, fileFilter); } catch (FileNotFoundException fnfe) { return Lists.newArrayList(); } }
private Optional<Long> getLatestModTime(Iterable<Path> paths) throws IOException { long latestModTime = Long.MIN_VALUE; for (FileStatus status : FileListUtils.listMostNestedPathRecursively(this.fs, paths)) { latestModTime = Math.max(latestModTime, status.getModificationTime()); } return latestModTime == Long.MIN_VALUE ? Optional.<Long> absent() : Optional.of(latestModTime); }
private static List<FileStatus> listFilesRecursivelyHelper(FileSystem fs, List<FileStatus> files, FileStatus fileStatus, PathFilter fileFilter, boolean applyFilterToDirectories) throws FileNotFoundException, IOException { if (fileStatus.isDirectory()) { for (FileStatus status : fs.listStatus(fileStatus.getPath(), applyFilterToDirectories ? fileFilter : NO_OP_PATH_FILTER)) { if (fileStatus.isDirectory()) { listFilesRecursivelyHelper(fs, files, status, fileFilter, applyFilterToDirectories); } else { files.add(fileStatus); } } } else if (fileFilter.accept(fileStatus.getPath())) { files.add(fileStatus); } return files; }
private Set<Path> getAllFilePathsRecursively (Set<Path> paths) throws IOException{ Set<Path> allPaths = Sets.newHashSet(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fs, paths)) { allPaths.add(fileStatus.getPath()); } return allPaths; }
@Override public Collection<TimestampedDatasetVersion> findDatasetVersions(Dataset dataset) { FileSystemDataset fsDataset = (FileSystemDataset) dataset; try { List<TimestampedDatasetVersion> timestampedVersions = Lists.newArrayList(); for (FileStatus fileStatus : FileListUtils.listMostNestedPathRecursively(this.fs, fsDataset.datasetRoot())) { timestampedVersions.add(new TimestampedDatasetVersion(new DateTime(fileStatus.getModificationTime()), fileStatus.getPath())); } return timestampedVersions; } catch (IOException e) { LOGGER.warn("Failed to get ModifiedTimeStamp for candidate dataset version at " + fsDataset.datasetRoot() + ". Ignoring."); return Lists.newArrayList(); } } }
private boolean findNewDataSinceCompactionStarted(Path inputPath, DateTime jobStartTime) throws IOException { for (FileStatus fstat : FileListUtils.listFilesRecursively(this.fs, inputPath)) { DateTime fileModificationTime = new DateTime(fstat.getModificationTime()); if (fileModificationTime.isAfter(jobStartTime)) { LOG.info(String.format("Found new file %s in input folder %s after compaction started. Will abort compaction.", fstat.getPath(), inputPath)); return true; } } return false; }
/** * Get all the unrenamed directories from the given paths * They are deepest level containing directories whose name doesn't have a suffix {@link MRCompactor#COMPACTION_RENAME_SOURCE_DIR_SUFFIX} * Also each directory needs to contain at least one file so empty directories will be excluded from the result */ public static Set<Path> getDeepestLevelUnrenamedDirsWithFileExistence (FileSystem fs, Set<Path> paths) throws IOException { Set<Path> unrenamed = Sets.newHashSet(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fs, paths)) { if (!fileStatus.getPath().getParent().toString().endsWith(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_SUFFIX)) { unrenamed.add(fileStatus.getPath().getParent()); } } return unrenamed; }
/** * Get all the renamed directories from the given paths * They are deepest level containing directories whose name has a suffix {@link MRCompactor#COMPACTION_RENAME_SOURCE_DIR_SUFFIX} * Also each directory needs to contain at least one file so empty directories will be excluded from the result */ public static Set<Path> getDeepestLevelRenamedDirsWithFileExistence (FileSystem fs, Set<Path> paths) throws IOException { Set<Path> renamedDirs = Sets.newHashSet(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fs, paths)) { if (fileStatus.getPath().getParent().toString().endsWith(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_SUFFIX)) { renamedDirs.add(fileStatus.getPath().getParent()); } } return renamedDirs; }