.reduce((x, y) -> x > y ? x : y).orElse(0); List<HoodieLogFile> logFilesToBeMoved = merged.getLogFiles().filter(lf -> lf.getLogVersion() > maxVersion).collect(Collectors.toList()); return logFilesToBeMoved.stream().map(lf -> { Preconditions.checkArgument(lf.getLogVersion() - maxVersion > 0, "Expect new log version to be sane"); HoodieLogFile newLogFile = new HoodieLogFile(new Path(lf.getPath().getParent(), FSUtils.makeLogFileName(lf.getFileId(), "." + FSUtils.getFileExtensionFromLog(lf.getPath()), compactionInstant, lf.getLogVersion() - maxVersion))); return Pair.of(lf, newLogFile); }).collect(Collectors.toList());
.filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get(); List<HoodieLogFile> logFilesToRepair = merged.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(compactionInstant)) .sorted(HoodieLogFile.getBaseInstantAndLogVersionComparator().reversed()) .collect(Collectors.toList()); FileSlice fileSliceForCompaction = fileSliceForCompaction.getLogFiles().findFirst().map(HoodieLogFile::getLogVersion) .orElse(HoodieLogFile.LOGFILE_BASE_VERSION - 1); String logExtn = fileSliceForCompaction.getLogFiles().findFirst().map(lf -> "." + lf.getFileExtension()) .orElse(HoodieLogFile.DELTA_EXTENSION); String parentPath = fileSliceForCompaction.getDataFile().map(df -> new Path(df.getPath()).getParent().toString()) .orElse(fileSliceForCompaction.getLogFiles().findFirst().map(lf -> lf.getPath().getParent().toString()).get()); for (HoodieLogFile toRepair : logFilesToRepair) { int version = maxUsedVersion + 1; HoodieLogFile newLf = new HoodieLogFile(new Path(parentPath, FSUtils.makeLogFileName(operation.getFileId(), logExtn, operation.getBaseInstantTime(), version))); result.add(Pair.of(toRepair, newLf));
public static Comparator<HoodieLogFile> getBaseInstantAndLogVersionComparator() { return (o1, o2) -> { String baseInstantTime1 = o1.getBaseCommitTime(); String baseInstantTime2 = o2.getBaseCommitTime(); if (baseInstantTime1.equals(baseInstantTime2)) { // reverse the order by log-version when base-commit is same return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion()); } // reverse the order by base-commits return baseInstantTime2.compareTo(baseInstantTime1); }; }
public HoodieLogFile rollOver(FileSystem fs) throws IOException { String fileId = getFileId(); String baseCommitTime = getBaseCommitTime(); String extension = "." + FSUtils.getFileExtensionFromLog(path); int newVersion = FSUtils .computeNextLogVersion(fs, path.getParent(), fileId, extension, baseCommitTime); return new HoodieLogFile(new Path(path.getParent(), FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion))); }
@Override public String toString() { return "HoodieArchivedLogFile {" + super.getPath() + '}'; } }
FSUtils.makeLogFileName(logFileId, fileExtension, commitTime, logVersion)); log.info("HoodieLogFile on path " + logPath); HoodieLogFile logFile = new HoodieLogFile(logPath);
logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile))) .collect(Collectors.toList()), readerSchema, readBlocksLazily, reverseReader, bufferSize); Set<HoodieLogFile> scannedLogFiles = new HashSet<>(); log.info("Reading a data block from file " + logFile.getPath()); if (isNewInstantBlock(r) && !readBlocksLazily) { break; case DELETE_BLOCK: log.info("Reading a delete block from file " + logFile.getPath()); if (isNewInstantBlock(r) && !readBlocksLazily) { log.info("Reading a command block from file " + logFile.getPath()); "Rolling back the last corrupted log block read in " + logFile.getPath()); currentInstantLogBlocks.pop(); numBlocksRolledBack++; log.info("Rolling back the last log block read in " + logFile.getPath()); currentInstantLogBlocks.pop(); numBlocksRolledBack++; + " invalid or extra rollback command block in " + logFile.getPath()); break; } else { log.warn("Unable to apply rollback command block in " + logFile.getPath()); log.info("Found a corrupt block in " + logFile.getPath());
List<List<IndexedRecord>> allRecords = new ArrayList<>(); while (writer.getLogFile().getLogVersion() != 4) { logFiles.add(writer.getLogFile()); List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100); logFiles.stream().map(logFile -> logFile.getPath().toString()).collect(Collectors.toList()), schema, "100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
FileStatus[] fileStatuses = metaClient.getFs().listStatus(new Path(dp)); Preconditions.checkArgument(fileStatuses.length == 1, "Expect only 1 file-status"); return new HoodieLogFile(fileStatuses[0]); } catch (FileNotFoundException fe) { throw new CompactionValidationException(fe.getMessage()); logFilesInFileSlice.stream().filter(lf -> !logFilesInCompactionOp.contains(lf)) .collect(Collectors.toSet()); Preconditions.checkArgument(diff.stream().allMatch(lf -> lf.getBaseCommitTime().equals(compactionInstant)), "There are some log-files which are neither specified in compaction plan " + "nor present after compaction request instant. Some of these :" + diff);
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant, String compactionInstant, HoodieTableFileSystemView fsView) { // Ensure new names of log-files are on expected lines Set<HoodieLogFile> uniqNewLogFiles = new HashSet<>(); Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>(); renameFiles.stream().forEach(lfPair -> { Assert.assertFalse("Old Log File Names do not collide", uniqOldLogFiles.contains(lfPair.getKey())); Assert.assertFalse("New Log File Names do not collide", uniqNewLogFiles.contains(lfPair.getValue())); uniqOldLogFiles.add(lfPair.getKey()); uniqNewLogFiles.add(lfPair.getValue()); }); renameFiles.stream().forEach(lfPair -> { HoodieLogFile oldLogFile = lfPair.getLeft(); HoodieLogFile newLogFile = lfPair.getValue(); Assert.assertEquals("Base Commit time is expected", ingestionInstant, newLogFile.getBaseCommitTime()); Assert.assertEquals("Base Commit time is expected", compactionInstant, oldLogFile.getBaseCommitTime()); Assert.assertEquals("File Id is expected", oldLogFile.getFileId(), newLogFile.getFileId()); HoodieLogFile lastLogFileBeforeCompaction = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant) .filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())) .map(fs -> fs.getLogFiles().findFirst().get()).findFirst().get(); Assert.assertEquals("Log Version expected", lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion()); Assert.assertTrue("Log version does not collide", newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion()); }); }
return jsc.parallelize(renameActions, parallelism).map(lfPair -> { try { log.info("RENAME " + lfPair.getLeft().getPath() + " => " + lfPair.getRight().getPath()); renameLogFile(metaClient, lfPair.getLeft(), lfPair.getRight()); return new RenameOpResult(lfPair, true, Optional.absent()); log.error("Error renaming log file", e); log.error("\n\n\n***NOTE Compaction is in inconsistent state. Try running \"compaction repair " + lfPair.getLeft().getBaseCommitTime() + "\" to recover from failure ***\n\n\n"); return new RenameOpResult(lfPair, false, Optional.of(e));
this.writer = createLogWriter(fileSlice, baseInstantTime); this.currentLogFile = writer.getLogFile(); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogVersion(currentLogFile.getLogVersion()); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogOffset(writer.getCurrentSize()); } catch (Exception e) { + partitionPath, e); Path path = new Path(partitionPath, writer.getLogFile().getFileName()); writeStatus.getStat().setPath(path.toString()); doInit = false;
/** * Get the latest log file written from the list of log files passed in */ public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) { return logFiles.sorted(Comparator .comparing(s -> s.getLogVersion(), Comparator.reverseOrder())).findFirst(); }
/** * Add a new log file into the group */ public void addLogFile(HoodieLogFile logFile) { if (!fileSlices.containsKey(logFile.getBaseCommitTime())) { fileSlices.put(logFile.getBaseCommitTime(), new FileSlice(logFile.getBaseCommitTime(), id)); } fileSlices.get(logFile.getBaseCommitTime()).addLogFile(logFile); }
row[idx++] = fs.getLogFiles().filter(lf -> lf.getFileSize().isPresent()) .mapToLong(lf -> lf.getFileSize().get()).sum(); long logFilesScheduledForCompactionTotalSize = fs.getLogFiles().filter(lf -> lf.getFileSize().isPresent()) .filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime())) .mapToLong(lf -> lf.getFileSize().get()).sum(); row[idx++] = logFilesScheduledForCompactionTotalSize; long logFilesUnscheduledTotalSize = fs.getLogFiles().filter(lf -> lf.getFileSize().isPresent()) .filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime())) .mapToLong(lf -> lf.getFileSize().get()).sum(); row[idx++] = logFilesUnscheduledTotalSize; row[idx++] = logUnscheduledToBaseRatio; row[idx++] = fs.getLogFiles().filter(lf -> lf.getFileSize().isPresent()) .filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime())) .collect(Collectors.toList()).toString(); row[idx++] = fs.getLogFiles().filter(lf -> lf.getFileSize().isPresent()) .filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime())) .collect(Collectors.toList()).toString();
public FileSlice(String baseInstantTime, String fileId) { this.fileId = fileId; this.baseInstantTime = baseInstantTime; this.dataFile = null; this.logFiles = new TreeSet<>(HoodieLogFile.getBaseInstantAndLogVersionComparator()); }
assertEquals(logFilesList.size(), 4); for (HoodieLogFile logFile : logFilesList) { filenames.add(logFile.getFileName()); .flatMap(logFileList -> logFileList).collect(Collectors.toList()); assertEquals(logFilesList.size(), 1); assertTrue(logFilesList.get(0).getFileName() .equals(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0)));
private void createNewFile() throws IOException { this.output = fs.create(this.logFile.getPath(), false, bufferSize, replication, WriterBuilder.DEFAULT_SIZE_THRESHOLD, null); }