public static Comparator<HoodieLogFile> getBaseInstantAndLogVersionComparator() { return (o1, o2) -> { String baseInstantTime1 = o1.getBaseCommitTime(); String baseInstantTime2 = o2.getBaseCommitTime(); if (baseInstantTime1.equals(baseInstantTime2)) { // reverse the order by log-version when base-commit is same return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion()); } // reverse the order by base-commits return baseInstantTime2.compareTo(baseInstantTime1); }; }
/** * Get the latest log file written from the list of log files passed in */ public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) { return logFiles.sorted(Comparator .comparing(s -> s.getLogVersion(), Comparator.reverseOrder())).findFirst(); }
/** * Get the latest log version for the fileId in the partition path */ public static Optional<Integer> getLatestLogVersion(FileSystem fs, Path partitionPath, final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException { Optional<HoodieLogFile> latestLogFile = getLatestLogFile( getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime)); if (latestLogFile.isPresent()) { return Optional.of(latestLogFile.get().getLogVersion()); } return Optional.empty(); }
.reduce((x, y) -> x > y ? x : y).orElse(0); List<HoodieLogFile> logFilesToBeMoved = merged.getLogFiles().filter(lf -> lf.getLogVersion() > maxVersion).collect(Collectors.toList()); return logFilesToBeMoved.stream().map(lf -> { Preconditions.checkArgument(lf.getLogVersion() - maxVersion > 0, "Expect new log version to be sane"); HoodieLogFile newLogFile = new HoodieLogFile(new Path(lf.getPath().getParent(), FSUtils.makeLogFileName(lf.getFileId(), "." + FSUtils.getFileExtensionFromLog(lf.getPath()), compactionInstant, lf.getLogVersion() - maxVersion))); return Pair.of(lf, newLogFile); }).collect(Collectors.toList());
private Writer createLogWriter(Optional<FileSlice> fileSlice, String baseCommitTime) throws IOException, InterruptedException { return HoodieLogFormat.newWriterBuilder() .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath)) .withFileId(fileId).overBaseCommit(baseCommitTime).withLogVersion( fileSlice.get().getLogFiles().map(logFile -> logFile.getLogVersion()) .max(Comparator.naturalOrder()).orElse(HoodieLogFile.LOGFILE_BASE_VERSION)) .withSizeThreshold(config.getLogFileMaxSize()).withFs(fs) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build(); }
private Writer createLogWriter(Optional<FileSlice> fileSlice, String baseCommitTime) throws IOException, InterruptedException { return HoodieLogFormat.newWriterBuilder() .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath)) .withFileId(fileId).overBaseCommit(baseCommitTime).withLogVersion( fileSlice.get().getLogFiles().map(logFile -> logFile.getLogVersion()) .max(Comparator.naturalOrder()).orElse(HoodieLogFile.LOGFILE_BASE_VERSION)) .withSizeThreshold(config.getLogFileMaxSize()).withFs(fs) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build(); }
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant, String compactionInstant, HoodieTableFileSystemView fsView) { // Ensure new names of log-files are on expected lines Set<HoodieLogFile> uniqNewLogFiles = new HashSet<>(); Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>(); renameFiles.stream().forEach(lfPair -> { Assert.assertFalse("Old Log File Names do not collide", uniqOldLogFiles.contains(lfPair.getKey())); Assert.assertFalse("New Log File Names do not collide", uniqNewLogFiles.contains(lfPair.getValue())); uniqOldLogFiles.add(lfPair.getKey()); uniqNewLogFiles.add(lfPair.getValue()); }); renameFiles.stream().forEach(lfPair -> { HoodieLogFile oldLogFile = lfPair.getLeft(); HoodieLogFile newLogFile = lfPair.getValue(); Assert.assertEquals("Base Commit time is expected", ingestionInstant, newLogFile.getBaseCommitTime()); Assert.assertEquals("Base Commit time is expected", compactionInstant, oldLogFile.getBaseCommitTime()); Assert.assertEquals("File Id is expected", oldLogFile.getFileId(), newLogFile.getFileId()); HoodieLogFile lastLogFileBeforeCompaction = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant) .filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())) .map(fs -> fs.getLogFiles().findFirst().get()).findFirst().get(); Assert.assertEquals("Log Version expected", lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion()); Assert.assertTrue("Log version does not collide", newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion()); }); }
this.writer = createLogWriter(fileSlice, baseInstantTime); this.currentLogFile = writer.getLogFile(); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogVersion(currentLogFile.getLogVersion()); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogOffset(writer.getCurrentSize()); } catch (Exception e) {
List<List<IndexedRecord>> allRecords = new ArrayList<>(); while (writer.getLogFile().getLogVersion() != 4) { logFiles.add(writer.getLogFile()); List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
this.writer = createLogWriter(fileSlice, baseInstantTime); this.currentLogFile = writer.getLogFile(); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogVersion(currentLogFile.getLogVersion()); ((HoodieDeltaWriteStat) writeStatus.getStat()).setLogOffset(writer.getCurrentSize()); } catch (Exception e) {
@Test public void testEmptyLog() throws IOException, InterruptedException { Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1") .overBaseCommit("100").withFs(fs).build(); assertEquals("Just created this log, size should be 0", 0, writer.getCurrentSize()); assertTrue("Check all log files should start with a .", writer.getLogFile().getFileName().startsWith(".")); assertEquals("Version should be 1 for new log created", 1, writer.getLogFile().getLogVersion()); }
@Test public void testRollover() throws IOException, InterruptedException, URISyntaxException { Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1") .overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); // Write out a block writer = writer.appendBlock(dataBlock); // Get the size of the block long size = writer.getCurrentSize(); writer.close(); // Create a writer with the size threshold as the size we just wrote - so this has to roll writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100") .withFs(fs).withSizeThreshold(size - 1).build(); records = SchemaTestUtil.generateTestRecords(0, 100); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); assertEquals("This should be a new log file and hence size should be 0", 0, writer.getCurrentSize()); assertEquals("Version should be rolled to 2", 2, writer.getLogFile().getLogVersion()); writer.close(); }
int logFileVersion = writer.getLogFile().getLogVersion(); Path logFilePath = writer.getLogFile().getPath(); writer.close(); .withFs(fs).build(); Assert.assertFalse(writer.getLogFile().getLogVersion() == logFileVersion);