if (stats.getAverageRecordWidth() != FileBaseStatistics.AVG_RECORD_BYTES_UNKNOWN || stats.getTotalInputSize() == FileBaseStatistics.SIZE_UNKNOWN) { return stats; } else { final int calcSamples = (int) (stats.getTotalInputSize() / 1024); numSamples = Math.min(DEFAULT_MAX_NUM_SAMPLES, Math.max(DEFAULT_MIN_NUM_SAMPLES, calcSamples)); long stepSize = stats.getTotalInputSize() / numSamples; return new FileBaseStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), totalNumBytes / (float) samplesTaken);
if (stats.getAverageRecordWidth() != FileBaseStatistics.AVG_RECORD_BYTES_UNKNOWN || stats.getTotalInputSize() == FileBaseStatistics.SIZE_UNKNOWN) { return stats; } else { final int calcSamples = (int) (stats.getTotalInputSize() / 1024); numSamples = Math.min(DEFAULT_MAX_NUM_SAMPLES, Math.max(DEFAULT_MIN_NUM_SAMPLES, calcSamples)); long stepSize = stats.getTotalInputSize() / numSamples; return new FileBaseStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), totalNumBytes / (float) samplesTaken);
if (stats.getAverageRecordWidth() != FileBaseStatistics.AVG_RECORD_BYTES_UNKNOWN || stats.getTotalInputSize() == FileBaseStatistics.SIZE_UNKNOWN) { return stats; } else { final int calcSamples = (int) (stats.getTotalInputSize() / 1024); numSamples = Math.min(DEFAULT_MAX_NUM_SAMPLES, Math.max(DEFAULT_MIN_NUM_SAMPLES, calcSamples)); long stepSize = stats.getTotalInputSize() / numSamples; return new FileBaseStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), totalNumBytes / (float) samplesTaken);
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new org.apache.flink.api.common.io.FileInputFormat.FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount); return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth, totalCount);
final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount); return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth, totalCount);
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException { long totalLength = 0; long latestModTime = 0; for (Path path : filePaths) { final FileSystem fs = FileSystem.get(path.toUri()); final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files); if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) { totalLength = BaseStatistics.SIZE_UNKNOWN; } else if (totalLength != BaseStatistics.SIZE_UNKNOWN) { totalLength += stats.getTotalInputSize(); } latestModTime = Math.max(latestModTime, stats.getLastModificationTime()); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path[] filePaths, ArrayList<FileStatus> files) throws IOException { long totalLength = 0; long latestModTime = 0; for (Path path : filePaths) { final FileSystem fs = FileSystem.get(path.toUri()); final FileBaseStatistics stats = getFileStats(cachedStats, path, fs, files); if (stats.getTotalInputSize() == BaseStatistics.SIZE_UNKNOWN) { totalLength = BaseStatistics.SIZE_UNKNOWN; } else if (totalLength != BaseStatistics.SIZE_UNKNOWN) { totalLength += stats.getTotalInputSize(); } latestModTime = Math.max(latestModTime, stats.getLastModificationTime()); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, stats.getTotalInputSize()); format.configure(new Configuration()); FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); BaseStatistics latest = format.getStatistics(fakeStats); Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize()); format.configure(new Configuration()); FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(Math.min(Math.min(modTime1, modTime2), modTime3) - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); BaseStatistics reGathered = format.getStatistics(outDatedFakeStats); Assert.assertEquals("The file size from the statistics is wrong.", TOTAL, reGathered.getTotalInputSize());
if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
Assert.assertEquals("The file size from the statistics is wrong.", SIZE, stats.getTotalInputSize()); format.configure(new Configuration()); FileBaseStatistics fakeStats = new FileBaseStatistics(stats.getLastModificationTime(), FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); BaseStatistics latest = format.getStatistics(fakeStats); Assert.assertEquals("The file size from the statistics is wrong.", FAKE_SIZE, latest.getTotalInputSize()); format.configure(new Configuration()); FileBaseStatistics outDatedFakeStats = new FileBaseStatistics(stats.getLastModificationTime() - 1, FAKE_SIZE, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); BaseStatistics reGathered = format.getStatistics(outDatedFakeStats); Assert.assertEquals("The file size from the statistics is wrong.", SIZE, reGathered.getTotalInputSize());