public static List<FileStatus> listFilesRecursively(FileSystem fs, Path path) throws IOException { return listFilesRecursively(fs, path, NO_OP_PATH_FILTER); }
public static List<FileStatus> listFilesRecursively(FileSystem fs, Iterable<Path> paths) throws IOException { List<FileStatus> results = Lists.newArrayList(); for (Path path : paths) { results.addAll(listFilesRecursively(fs, path)); } return results; }
@VisibleForTesting protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter fileFilter) throws IOException { try { return FileListUtils.listFilesRecursively(fs, path, fileFilter); } catch (FileNotFoundException fnfe) { return Lists.newArrayList(); } }
private Set<Path> getAllFilePathsRecursively (Set<Path> paths) throws IOException{ Set<Path> allPaths = Sets.newHashSet(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fs, paths)) { allPaths.add(fileStatus.getPath()); } return allPaths; }
private boolean findNewDataSinceCompactionStarted(Path inputPath, DateTime jobStartTime) throws IOException { for (FileStatus fstat : FileListUtils.listFilesRecursively(this.fs, inputPath)) { DateTime fileModificationTime = new DateTime(fstat.getModificationTime()); if (fileModificationTime.isAfter(jobStartTime)) { LOG.info(String.format("Found new file %s in input folder %s after compaction started. Will abort compaction.", fstat.getPath(), inputPath)); return true; } } return false; }
/** * Get all the unrenamed directories from the given paths * They are deepest level containing directories whose name doesn't have a suffix {@link MRCompactor#COMPACTION_RENAME_SOURCE_DIR_SUFFIX} * Also each directory needs to contain at least one file so empty directories will be excluded from the result */ public static Set<Path> getDeepestLevelUnrenamedDirsWithFileExistence (FileSystem fs, Set<Path> paths) throws IOException { Set<Path> unrenamed = Sets.newHashSet(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fs, paths)) { if (!fileStatus.getPath().getParent().toString().endsWith(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_SUFFIX)) { unrenamed.add(fileStatus.getPath().getParent()); } } return unrenamed; }
/** * Get all the renamed directories from the given paths * They are deepest level containing directories whose name has a suffix {@link MRCompactor#COMPACTION_RENAME_SOURCE_DIR_SUFFIX} * Also each directory needs to contain at least one file so empty directories will be excluded from the result */ public static Set<Path> getDeepestLevelRenamedDirsWithFileExistence (FileSystem fs, Set<Path> paths) throws IOException { Set<Path> renamedDirs = Sets.newHashSet(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fs, paths)) { if (fileStatus.getPath().getParent().toString().endsWith(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_SUFFIX)) { renamedDirs.add(fileStatus.getPath().getParent()); } } return renamedDirs; }
public static List<Path> getApplicableFilePaths (FileSystem fs, Path dataDir, final Collection<String> extensions) throws IOException { if (!fs.exists(dataDir)) { return Lists.newArrayList(); } List<Path> paths = Lists.newArrayList(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fs, dataDir, new PathFilter() { @Override public boolean accept(Path path) { for (String validExtention : extensions) { if (path.getName().endsWith(validExtention)) { return true; } } return false; } })) { paths.add(fileStatus.getPath()); } return paths; }
@Override public List<Path> call() throws Exception { if (!MRCompactorJobRunner.this.fs.exists(dataDir)) { return Lists.newArrayList(); } List<Path> paths = Lists.newArrayList(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(MRCompactorJobRunner.this.fs, dataDir, new PathFilter() { @Override public boolean accept(Path path) { for (String validExtention : getApplicableFileExtensions()) { if (path.getName().endsWith(validExtention)) { return true; } } return false; } })) { paths.add(fileStatus.getPath()); } return paths; } });
public Optional<DateTime> getEarliestLateFileModificationTime() { DateTimeZone timeZone = DateTimeZone .forID(this.dataset.jobProps().getProp(MRCompactor.COMPACTION_TIMEZONE, MRCompactor.DEFAULT_COMPACTION_TIMEZONE)); try { long maxTimestamp = Long.MIN_VALUE; for (FileStatus status : FileListUtils.listFilesRecursively(this.fs, this.dataset.outputLatePath())) { maxTimestamp = Math.max(maxTimestamp, status.getModificationTime()); } return maxTimestamp == Long.MIN_VALUE ? Optional.<DateTime>absent():Optional.of(new DateTime(maxTimestamp, timeZone)); } catch (Exception e) { logger.error("Failed to get earliest late file modification time"); return Optional.absent(); } }
/** * Check if inputFolder contains any files which have modification times which are more * recent than the last compaction time as stored within outputFolder; return any files * which do. An empty list will be returned if all files are older than the last compaction time. */ private Set<Path> getNewDataInFolder(Path inputFolder, Path outputFolder) throws IOException { Set<Path> newFiles = Sets.newHashSet(); if (!this.fs.exists(inputFolder) || !this.fs.exists(outputFolder)) { return newFiles; } DateTime lastCompactionTime = new DateTime(MRCompactor.readCompactionTimestamp(this.fs, outputFolder)); for (FileStatus fstat : FileListUtils.listFilesRecursively(this.fs, inputFolder)) { DateTime fileModificationTime = new DateTime(fstat.getModificationTime()); if (fileModificationTime.isAfter(lastCompactionTime)) { LOG.info ("[" + fileModificationTime.getMillis() + "] " + fstat.getPath() + " is after " + lastCompactionTime.getMillis()); newFiles.add(fstat.getPath()); } } if (!newFiles.isEmpty()) { LOG.info(String.format("Found %d new files within folder %s which are more recent than the previous " + "compaction start time of %s.", newFiles.size(), inputFolder, lastCompactionTime)); } return newFiles; }
public Set<FileStatus> getPaths(boolean skipHiddenPaths) throws IOException { if (!this.fs.exists(this.rootPath)) { return Sets.newHashSet(); } PathFilter actualFilter = skipHiddenPaths ? new AndPathFilter(new HiddenFilter(), this.pathFilter) : this.pathFilter; List<FileStatus> files = FileListUtils.listFilesRecursively(this.fs, this.rootPath, actualFilter); return Sets.newHashSet(files); } }
/** * For regular compactions, compaction timestamp is the time the compaction job starts. * * If this is a recompaction from output paths, the compaction timestamp will remain the same as previously * persisted compaction time. This is because such a recompaction doesn't consume input data, so next time, * whether a file in the input folder is considered late file should still be based on the previous compaction * timestamp. */ private DateTime getCompactionTimestamp() throws IOException { DateTimeZone timeZone = DateTimeZone.forID( this.dataset.jobProps().getProp(MRCompactor.COMPACTION_TIMEZONE, MRCompactor.DEFAULT_COMPACTION_TIMEZONE)); if (!this.recompactFromDestPaths) { return new DateTime(timeZone); } Set<Path> inputPaths = getInputPaths(); long maxTimestamp = Long.MIN_VALUE; for (FileStatus status : FileListUtils.listFilesRecursively(this.fs, inputPaths)) { maxTimestamp = Math.max(maxTimestamp, status.getModificationTime()); } return maxTimestamp == Long.MIN_VALUE ? new DateTime(timeZone) : new DateTime(maxTimestamp, timeZone); }
@Override public synchronized Collection<FileStatus> getFiles() throws IOException{ if(filesInitialized){ return this.allFileStatus; } this.filesInitialized = true; FileSystem fs = FileSystem.get(rc.getFsURI(), new Configuration()); if(!fs.exists(this.rc.getPath())){ return Collections.emptyList(); } Collection<Path> validPaths = ReplicationDataValidPathPicker.getValidPaths(this); //ReplicationDataValidPathPicker.getValidPaths(fs, this.rc.getPath(), this.rdc); for(Path p: validPaths){ this.allFileStatus.addAll(FileListUtils.listFilesRecursively(fs, p)); } return this.allFileStatus; }
for (FileStatus fromFile : FileListUtils.listFilesRecursively(fileSystem, from)) {
@Override public synchronized Optional<ComparableWatermark> getWatermark() { if(this.initialized) { return this.cachedWatermark; } try { long curTs = -1; FileSystem fs = FileSystem.get(rc.getFsURI(), new Configuration()); Collection<Path> validPaths = ReplicationDataValidPathPicker.getValidPaths(this); for(Path p: validPaths){ this.allFileStatus.addAll(FileListUtils.listFilesRecursively(fs, p)); } for (FileStatus f : this.allFileStatus) { if (f.getModificationTime() > curTs) { curTs = f.getModificationTime(); } } ComparableWatermark result = new LongWatermark(curTs); this.cachedWatermark = Optional.of(result); if (this.cachedWatermark.isPresent()) { this.initialized = true; } return this.cachedWatermark; } catch (IOException e) { log.error("Error while retrieve the watermark for " + this); return this.cachedWatermark; } }
Set<Path> total = Sets.newHashSet(); for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fs, path)) { if (appendDelta) {
List<FileStatus> srcLogFiles = FileListUtils.listFilesRecursively(this.srcFs, this.srcLogDir, new PathFilter() { @Override public boolean accept(Path path) {