@Override public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) { return new HoodieDefaultTimeline(instants.stream().filter( s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER) && HoodieTimeline.compareTimestamps( s.getTimestamp(), endTs, LESSER_OR_EQUAL)), details); }
@Override public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) { return new HoodieDefaultTimeline( instants.stream() .filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER)) .limit(numCommits), details); }
@Override public boolean isBeforeTimelineStarts(String instant) { Optional<HoodieInstant> firstCommit = firstInstant(); return firstCommit.isPresent() && HoodieTimeline.compareTimestamps(instant, firstCommit.get().getTimestamp(), LESSER); }
/** * Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime */ public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) { return getAllFileSlices() .filter(slice -> HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)) .findFirst(); }
/** * Obtain the latest file slice, upto a commitTime i.e < maxInstantTime * @param maxInstantTime Max Instant Time * @return */ public Optional<FileSlice> getLatestFileSliceBefore(String maxInstantTime) { return getAllFileSlices() .filter(slice -> HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxInstantTime, HoodieTimeline.LESSER)) .findFirst(); }
@Override public Stream<HoodieDataFile> getLatestDataFilesOn(String partitionPath, String instantTime) { return getAllFileGroups(partitionPath) .map(fileGroup -> fileGroup.getAllDataFiles() .filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), instantTime, HoodieTimeline.EQUAL)) .filter(df -> !isDataFileDueToPendingCompaction(df)) .findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
@Override public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime) { return getAllFileGroups(partitionPath) .map(fileGroup -> fileGroup.getAllDataFiles() .filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)) .filter(df -> !isDataFileDueToPendingCompaction(df)) .findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
/** * Gets the latest version < commitTime. This version file could still be used by queries. */ private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList, HoodieInstant commitTime) { for (FileSlice file : fileSliceList) { String fileCommitTime = file.getBaseInstantTime(); if (HoodieTimeline .compareTimestamps(commitTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) { // fileList is sorted on the reverse, so the first commit we find <= commitTime is the // one we want return fileCommitTime; } } // There is no version of this file which is <= commitTime return null; }
/** * Gets the latest version < commitTime. This version file could still be used by queries. */ private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList, HoodieInstant commitTime) { for (FileSlice file : fileSliceList) { String fileCommitTime = file.getBaseInstantTime(); if (HoodieTimeline .compareTimestamps(commitTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) { // fileList is sorted on the reverse, so the first commit we find <= commitTime is the // one we want return fileCommitTime; } } // There is no version of this file which is <= commitTime return null; }
/** * Determine if file slice needed to be preserved for pending compaction * @param fileSlice File Slice * @return true if file slice needs to be preserved, false otherwise. */ private boolean isFileSliceNeededForPendingCompaction(FileSlice fileSlice) { CompactionOperation op = fileIdToPendingCompactionOperations.get(fileSlice.getFileId()); if (null != op) { // If file slice's instant time is newer or same as that of operation, do not clean return HoodieTimeline.compareTimestamps(fileSlice.getBaseInstantTime(), op.getBaseInstantTime(), HoodieTimeline.GREATER_OR_EQUAL); } return false; } }
/** * Determine if file slice needed to be preserved for pending compaction * @param fileSlice File Slice * @return true if file slice needs to be preserved, false otherwise. */ private boolean isFileSliceNeededForPendingCompaction(FileSlice fileSlice) { CompactionOperation op = fileIdToPendingCompactionOperations.get(fileSlice.getFileId()); if (null != op) { // If file slice's instant time is newer or same as that of operation, do not clean return HoodieTimeline.compareTimestamps(fileSlice.getBaseInstantTime(), op.getBaseInstantTime(), HoodieTimeline.GREATER_OR_EQUAL); } return false; } }
/** * A FileSlice is considered committed, if one of the following is true - There is a committed * data file - There are some log files, that are based off a commit or delta commit */ private boolean isFileSliceCommitted(FileSlice slice) { String maxCommitTime = lastInstant.get().getTimestamp(); return timeline.containsOrBeforeTimelineStarts(slice.getBaseInstantTime()) && HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL); }
/** * Remove older instants from auxiliary meta folder * * @param thresholdInstant Hoodie Instant * @return success if all eligible file deleted successfully * @throws IOException in case of error */ private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException { List<HoodieInstant> instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(), new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE); List<HoodieInstant> instantsToBeDeleted = instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(), thresholdInstant.getTimestamp(), LESSER_OR_EQUAL)).collect(Collectors.toList()); boolean success = true; for (HoodieInstant deleteInstant : instantsToBeDeleted) { log.info("Deleting instant " + deleteInstant + " in auxiliary meta path " + metaClient.getMetaAuxiliaryPath()); Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName()); if (metaClient.getFs().exists(metaFile)) { success &= metaClient.getFs().delete(metaFile, false); log.info("Deleted instant file in auxiliary metapath : " + metaFile); } } return success; }
/** * Remove older instants from auxiliary meta folder * * @param thresholdInstant Hoodie Instant * @return success if all eligible file deleted successfully * @throws IOException in case of error */ private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException { List<HoodieInstant> instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(), new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE); List<HoodieInstant> instantsToBeDeleted = instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(), thresholdInstant.getTimestamp(), LESSER_OR_EQUAL)).collect(Collectors.toList()); boolean success = true; for (HoodieInstant deleteInstant : instantsToBeDeleted) { log.info("Deleting instant " + deleteInstant + " in auxiliary meta path " + metaClient.getMetaAuxiliaryPath()); Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName()); if (metaClient.getFs().exists(metaFile)) { success &= metaClient.getFs().delete(metaFile, false); log.info("Deleted instant file in auxiliary metapath : " + metaFile); } } return success; }
private boolean checkIfValidCommit(HoodieTableMetaClient metaClient, String commitTs) { HoodieTimeline commitTimeline = metaClient.getActiveTimeline().filterCompletedInstants(); // Check if the last commit ts for this row is 1) present in the timeline or // 2) is less than the first commit ts in the timeline return !commitTimeline.empty() && (commitTimeline .containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs)) || HoodieTimeline .compareTimestamps(commitTimeline.firstInstant().get().getTimestamp(), commitTs, HoodieTimeline.GREATER)); }
private boolean checkIfValidCommit(HoodieTableMetaClient metaClient, String commitTs) { HoodieTimeline commitTimeline = metaClient.getActiveTimeline().filterCompletedInstants(); // Check if the last commit ts for this row is 1) present in the timeline or // 2) is less than the first commit ts in the timeline return !commitTimeline.empty() && (commitTimeline .containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs)) || HoodieTimeline .compareTimestamps(commitTimeline.firstInstant().get().getTimestamp(), commitTs, HoodieTimeline.GREATER)); }
@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset") public String compareCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path) throws Exception { HoodieTableMetaClient target = new HoodieTableMetaClient(HoodieCLI.conf, path); HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); HoodieTableMetaClient source = HoodieCLI.tableMetadata; HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); String targetLatestCommit = targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp(); String sourceLatestCommit = sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp(); if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) { // source is behind the target List<String> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE) .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList()); return "Source " + source.getTableConfig().getTableName() + " is behind by " + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup; } else { List<String> commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE) .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList()); return "Source " + source.getTableConfig().getTableName() + " is ahead by " + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup; } }
HoodieTimeline.compareTimestamps(earliestInflight.getTimestamp(), instantTime, HoodieTimeline.GREATER), "Earliest write inflight instant time must be later " + "than compaction time. Earliest :" + earliestInflight + ", Compaction scheduled at " + instantTime); HoodieTimeline.compareTimestamps(instant.getTimestamp(), instantTime, HoodieTimeline.GREATER_OR_EQUAL)).collect(Collectors.toList()); Preconditions.checkArgument(conflictingInstants.isEmpty(),
public void startCommitWithTime(String instantTime) { if (rollbackInFlight) { // Only rollback inflight commit/delta-commits. Do not touch compaction commits rollbackInflightCommits(); } logger.info("Generate a new instant time " + instantTime); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath()); // if there are pending compactions, their instantTime must not be greater than that of this instant time metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().ifPresent(latestPending -> { Preconditions.checkArgument( HoodieTimeline.compareTimestamps(latestPending.getTimestamp(), instantTime, HoodieTimeline.LESSER), "Latest pending compaction instant time must be earlier " + "than this instant time. Latest Compaction :" + latestPending + ", Ingesting at " + instantTime); }); HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc); HoodieActiveTimeline activeTimeline = table.getActiveTimeline(); String commitActionType = table.getMetaClient().getCommitActionType(); activeTimeline.createInflight(new HoodieInstant(true, commitActionType, instantTime)); }
public void startCommitWithTime(String instantTime) { if (rollbackInFlight) { // Only rollback inflight commit/delta-commits. Do not touch compaction commits rollbackInflightCommits(); } logger.info("Generate a new instant time " + instantTime); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath()); // if there are pending compactions, their instantTime must not be greater than that of this instant time metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().ifPresent(latestPending -> { Preconditions.checkArgument( HoodieTimeline.compareTimestamps(latestPending.getTimestamp(), instantTime, HoodieTimeline.LESSER), "Latest pending compaction instant time must be earlier " + "than this instant time. Latest Compaction :" + latestPending + ", Ingesting at " + instantTime); }); HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc); HoodieActiveTimeline activeTimeline = table.getActiveTimeline(); String commitActionType = table.getMetaClient().getCommitActionType(); activeTimeline.createInflight(new HoodieInstant(true, commitActionType, instantTime)); }