@Override public Stream<HoodieDataFile> getLatestDataFiles() { return fileGroupMap.values().stream() .map(fileGroup -> fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
@Override public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) { return fileGroupMap.values().stream() .map(fileGroup -> fileGroup.getAllDataFiles() .filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime()) && !isDataFileDueToPendingCompaction(dataFile)) .findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
@Override public Stream<HoodieDataFile> getLatestDataFiles(final String partitionPath) { return getAllFileGroups(partitionPath) .map(fileGroup -> { return fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst(); }) .filter(Optional::isPresent) .map(Optional::get); }
@Override public Stream<HoodieDataFile> getLatestDataFilesOn(String partitionPath, String instantTime) { return getAllFileGroups(partitionPath) .map(fileGroup -> fileGroup.getAllDataFiles() .filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), instantTime, HoodieTimeline.EQUAL)) .filter(df -> !isDataFileDueToPendingCompaction(df)) .findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
@Override public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime) { return getAllFileGroups(partitionPath) .map(fileGroup -> fileGroup.getAllDataFiles() .filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)) .filter(df -> !isDataFileDueToPendingCompaction(df)) .findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
String fileId = fileGroup.getId(); Set<String> filenames = Sets.newHashSet(); fileGroup.getAllDataFiles().forEach(dataFile -> { assertEquals("All same fileId should be grouped", fileId, dataFile.getFileId()); filenames.add(dataFile.getFileName());
for (HoodieFileGroup fileGroup : fileGroups) { Set<String> commitTimes = new HashSet<>(); fileGroup.getAllDataFiles().forEach(value -> { logger.debug("Data File - " + value); commitTimes.add(value.getCommitTime());
fileGroup.getAllDataFiles().filter(df -> { return compactionFileIdToLatestFileSlice.get(fileGroup.getId()) .getBaseInstantTime().equals(df.getCommitTime()); List<HoodieDataFile> dataFiles = fileGroup.getAllDataFiles().collect(Collectors.toList());