private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
latestModTime = Math.max(latestModTime, file.getModificationTime()); if (!s.isDir()) { files.add(s); latestModTime = Math.max(s.getModificationTime(), latestModTime);
latestModTime = Math.max(latestModTime, file.getModificationTime()); if (!s.isDir()) { files.add(s); latestModTime = Math.max(s.getModificationTime(), latestModTime);
if (!status.isDir()) { Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status);
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
String tempDir = tempDirFile.getAbsolutePath(); String f1 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE1); long modTime1 = fs.getFileStatus(new Path(f1)).getModificationTime(); String f2 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE2); long modTime2 = fs.getFileStatus(new Path(f2)).getModificationTime(); String f3 = TestFileUtils.createTempFileInDirectory(tempDir, SIZE3); long modTime3 = fs.getFileStatus(new Path(f3)).getModificationTime();
final long fakeSize = 10065; String tempFile1 = TestFileUtils.createTempFile(size1); final long lastModTime1 = fs.getFileStatus(new Path(tempFile1)).getModificationTime(); final long lastModTime2 = fs.getFileStatus(new Path(tempFile2)).getModificationTime();
@Override public long getModificationTime() { return fileStatus.getModificationTime(); }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
if (!status.isDir()) { Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status);
if (!status.isDir()) { Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status);
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
/** * Check and update a previously unfinished result partition, if it has finished, update * its file information. * * @param resultPartitionID Result partition id. * @param fileInfo Previous file information of this result partition. * @return If this result partition is ready to be consumed, return true, otherwise return false. */ private void updateUnfinishedResultPartition( ResultPartitionID resultPartitionID, YarnResultPartitionFileInfo fileInfo) throws IOException { String finishedFilePath = ExternalBlockShuffleUtils.generateFinishedPath( fileInfo.getRootDirAndPartitionDir().f1); try { // Use finishedFile to get the partition ready time. FileStatus fileStatus = fileSystem.getFileStatus(new Path(finishedFilePath)); if (fileStatus != null) { fileInfo.setReadyToBeConsumed(fileStatus.getModificationTime()); } } catch (FileNotFoundException e) { // The result partition is still unfinished. throw new PartitionNotFoundException(resultPartitionID); } // Other IOExceptions will be thrown out. }