/** * Get the compacted commit timeline visible for this table */ public HoodieTimeline getCommitTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: case MERGE_ON_READ: // We need to include the parquet files written out in delta commits in tagging return getActiveTimeline().getCommitTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } }
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup) throws IOException { long totalNew = 0; HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants(); for (String commit : commitsToCatchup) { HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes( timeline.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit)).get(), HoodieCommitMetadata.class); totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten(); } return totalNew; } }
private static Map<String, String> readMetadataInfo( @NonNull final HoodieConfiguration hoodieConf) { try { final FileSystem fs = FSUtils.getFs(hoodieConf.getConf()); HoodieUtil.initHoodieDataset(fs, hoodieConf); final HoodieTableMetaClient hoodieTableMetaClient = new HoodieTableMetaClient(new HadoopConfiguration(hoodieConf.getConf()).getHadoopConf(), hoodieConf.getBasePath(), true); final HoodieActiveTimeline hoodieActiveTimeline = hoodieTableMetaClient.getActiveTimeline(); final java.util.Optional<HoodieInstant> lastInstant = hoodieActiveTimeline.getCommitTimeline() .filterCompletedInstants().lastInstant(); if (lastInstant.isPresent()) { log.info("using hoodie instant for reading checkpoint info :{}", lastInstant.get().getTimestamp()); final HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(hoodieActiveTimeline.getInstantDetails(lastInstant.get()).get()); final String serCommitInfo = commitMetadata.getMetadata(HOODIE_METADATA_KEY); if (!Strings.isNullOrEmpty(serCommitInfo)) { return MapUtil.deserializeMap(serCommitInfo); } } return new HashMap<>(); } catch (IOException e) { log.error("failed to read metadata info", e); throw new JobRuntimeException("failed to read metadata information", e); } } }
final boolean dryRun) throws IOException { String latestCommit = HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get() .getTimestamp(); List<String> partitionPaths = FSUtils.getAllFoldersThreeLevelsDown(HoodieCLI.fs,
/** * Get the commit timeline visible for this table */ public HoodieTimeline getCommitsTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: return getActiveTimeline().getCommitTimeline(); case MERGE_ON_READ: // We need to include the parquet files written out in delta commits // Include commit action to be able to start doing a MOR over a COW dataset - no // migration required return getActiveTimeline().getCommitsTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } }
/** * Get the commit + pending-compaction timeline visible for this table. * A RT filesystem view is constructed with this timeline so that file-slice after pending compaction-requested * instant-time is also considered valid. A RT file-system view for reading must then merge the file-slices before * and after pending compaction instant so that all delta-commits are read. */ public HoodieTimeline getCommitsAndCompactionTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: return getActiveTimeline().getCommitTimeline(); case MERGE_ON_READ: return getActiveTimeline().getCommitsAndCompactionTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } }
@Override public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime) { logger.info("Checking if compaction needs to be run on " + config.getBasePath()); Optional<HoodieInstant> lastCompaction = getActiveTimeline().getCommitTimeline() .filterCompletedInstants().lastInstant(); String deltaCommitsSinceTs = "0"; if (lastCompaction.isPresent()) { deltaCommitsSinceTs = lastCompaction.get().getTimestamp(); } int deltaCommitsSinceLastCompaction = getActiveTimeline().getDeltaCommitTimeline() .findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants(); if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) { logger.info("Not running compaction as only " + deltaCommitsSinceLastCompaction + " delta commits was found since last compaction " + deltaCommitsSinceTs + ". Waiting for " + config.getInlineCompactDeltaCommitMax()); return new HoodieCompactionPlan(); } logger.info("Compacting merge on read table " + config.getBasePath()); HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor(); try { return compactor.generateCompactionPlan(jsc, this, config, instantTime, new HashSet<>(((HoodieTableFileSystemView)getRTFileSystemView()) .getFileIdToPendingCompaction().keySet())); } catch (IOException e) { throw new HoodieCompactionException("Could not schedule compaction " + config.getBasePath(), e); } }
@Override public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime) { logger.info("Checking if compaction needs to be run on " + config.getBasePath()); Optional<HoodieInstant> lastCompaction = getActiveTimeline().getCommitTimeline() .filterCompletedInstants().lastInstant(); String deltaCommitsSinceTs = "0"; if (lastCompaction.isPresent()) { deltaCommitsSinceTs = lastCompaction.get().getTimestamp(); } int deltaCommitsSinceLastCompaction = getActiveTimeline().getDeltaCommitTimeline() .findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants(); if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) { logger.info("Not running compaction as only " + deltaCommitsSinceLastCompaction + " delta commits was found since last compaction " + deltaCommitsSinceTs + ". Waiting for " + config.getInlineCompactDeltaCommitMax()); return new HoodieCompactionPlan(); } logger.info("Compacting merge on read table " + config.getBasePath()); HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor(); try { return compactor.generateCompactionPlan(jsc, this, config, instantTime, new HashSet<>(((HoodieTableFileSystemView)getRTFileSystemView()) .getFileIdToPendingCompaction().keySet())); } catch (IOException e) { throw new HoodieCompactionException("Could not schedule compaction " + config.getBasePath(), e); } }
timeline = metaClient.getActiveTimeline().getCommitTimeline(); } else if (excludeCompaction) { timeline = metaClient.getActiveTimeline().getCommitsTimeline();
HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
@CliCommand(value = "savepoint create", help = "Savepoint a commit") public String savepoint(@CliOption(key = {"commit"}, help = "Commit to savepoint") final String commitTime, @CliOption(key = {"user"}, help = "User who is creating the savepoint") final String user, @CliOption(key = {"comments"}, help = "Comments for creating the savepoint") final String comments) throws Exception { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants(); HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); if (!timeline.containsInstant(commitInstant)) { return "Commit " + commitTime + " not found in Commits " + timeline; } HoodieWriteClient client = createHoodieClient(null, HoodieCLI.tableMetadata.getBasePath()); if (client.savepoint(commitTime, user, comments)) { // Refresh the current refreshMetaClient(); return String.format("The commit \"%s\" has been savepointed.", commitTime); } return String.format("Failed: Could not savepoint commit \"%s\".", commitTime); }
@Test public void checkCommitTimeline() throws IOException { HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline(); HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline(); assertTrue("Should be empty commit timeline", activeCommitTimeline.empty()); HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1"); activeTimeline.createInflight(instant); activeTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes())); // Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached activeTimeline = metaClient.getActiveTimeline(); activeCommitTimeline = activeTimeline.getCommitTimeline(); assertTrue("Should be empty commit timeline", activeCommitTimeline.empty()); HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant); activeTimeline = activeTimeline.reload(); activeCommitTimeline = activeTimeline.getCommitTimeline(); assertFalse("Should be the 1 commit we made", activeCommitTimeline.empty()); assertEquals("Commit should be 1", completedInstant, activeCommitTimeline.getInstants().findFirst().get()); assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(), activeCommitTimeline.getInstantDetails(completedInstant).get()); }
@Before public void init() throws IOException { metaClient = HoodieTestUtils.init(tmpFolder.getRoot().getAbsolutePath());; basePath = metaClient.getBasePath(); fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants()); roView = (TableFileSystemView.ReadOptimizedView) fsView; rtView = (TableFileSystemView.RealtimeView) fsView; }
@CliCommand(value = "savepoint rollback", help = "Savepoint a commit") public String rollbackToSavepoint( @CliOption(key = {"savepoint"}, help = "Savepoint to rollback") final String commitTime, @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path") final String sparkPropertiesPath) throws Exception { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants(); HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); if (!timeline.containsInstant(commitInstant)) { return "Commit " + commitTime + " not found in Commits " + timeline; } SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime, HoodieCLI.tableMetadata.getBasePath()); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); // Refresh the current refreshMetaClient(); if (exitCode != 0) { return "Savepoint " + commitTime + " failed to roll back"; } return "Savepoint " + commitTime + " rolled back"; }
@Test public void testTimelineOperations() throws Exception { timeline = new MockHoodieTimeline(Stream.of("01", "03", "05", "07", "09", "11", "13", "15", "17", "19"), Stream.of("21", "23")); HoodieTestUtils.assertStreamEquals("", Stream.of("05", "07", "09", "11"), timeline.getCommitTimeline().filterCompletedInstants().findInstantsInRange("04", "11").getInstants() .map(HoodieInstant::getTimestamp)); HoodieTestUtils.assertStreamEquals("", Stream.of("09", "11"), timeline.getCommitTimeline().filterCompletedInstants().findInstantsAfter("07", 2).getInstants() .map(HoodieInstant::getTimestamp)); assertFalse(timeline.empty()); assertFalse(timeline.getCommitTimeline().filterInflightsExcludingCompaction().empty()); assertEquals("", 12, timeline.countInstants()); HoodieTimeline activeCommitTimeline = timeline.getCommitTimeline().filterCompletedInstants(); assertEquals("", 10, activeCommitTimeline.countInstants()); assertEquals("", "01", activeCommitTimeline.firstInstant().get().getTimestamp()); assertEquals("", "11", activeCommitTimeline.nthInstant(5).get().getTimestamp()); assertEquals("", "19", activeCommitTimeline.lastInstant().get().getTimestamp()); assertEquals("", "09", activeCommitTimeline.nthFromLastInstant(5).get().getTimestamp()); assertTrue("", activeCommitTimeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "09"))); assertFalse("", activeCommitTimeline.isBeforeTimelineStarts("02")); assertTrue("", activeCommitTimeline.isBeforeTimelineStarts("00")); } }
HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete, instant5), timeline.getCommitTimeline().getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete), timeline.getCommitTimeline().filterCompletedInstants().getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant5), timeline.getCommitTimeline().filterInflightsExcludingCompaction().getInstants());
new HoodieTableMetaClient( new HadoopConfiguration(hoodieConf.getConf()).getHadoopConf(), basePath.toString(), true) .getActiveTimeline().getCommitTimeline().filterCompletedInstants().empty()); new HoodieTableMetaClient( new HadoopConfiguration(hoodieConf.getConf()).getHadoopConf(), basePath.toString(), true) .getActiveTimeline().getCommitTimeline().filterCompletedInstants().empty());
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime); assertEquals("Should contain 100 records", readRecords.count(), records.size()); fs = FSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf()); metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset<Row> localReadRecords = HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime); assertEquals("Should contain 100 records", localReadRecords.count(), localRecords.size());
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
Optional<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); assertTrue(commit.isPresent()); assertEquals("commit should be 001", "001", commit.get().getTimestamp());