/** * Create a file system view, as of the given timeline */ public HoodieTableFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline) { this.metaClient = metaClient; this.visibleActiveTimeline = visibleActiveTimeline; this.fileGroupMap = new HashMap<>(); this.partitionToFileGroupsMap = new HashMap<>(); // Build fileId to Pending Compaction Instants List<HoodieInstant> pendingCompactionInstants = metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList()); this.fileIdToPendingCompaction = ImmutableMap.copyOf( CompactionUtils.getAllPendingCompactionOperations(metaClient).entrySet().stream() .map(entry -> Pair.of(entry.getKey(), Pair.of(entry.getValue().getKey(), CompactionOperation.convertFromAvroRecordInstance(entry.getValue().getValue())))) .collect(Collectors.toMap(Pair::getKey, Pair::getValue))); }
/** * Get all pending compaction plans along with their instants * * @param metaClient Hoodie Meta Client */ public static List<Pair<HoodieInstant, HoodieCompactionPlan>> getAllPendingCompactionPlans( HoodieTableMetaClient metaClient) { List<HoodieInstant> pendingCompactionInstants = metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList()); return pendingCompactionInstants.stream().map(instant -> { try { return Pair.of(instant, getCompactionPlan(metaClient, instant.getTimestamp())); } catch (IOException e) { throw new HoodieException(e); } }).collect(Collectors.toList()); }
table.getActiveTimeline().filterPendingCompactionTimeline().firstInstant();
table.getActiveTimeline().filterPendingCompactionTimeline().firstInstant();
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); Set<String> pendingCompactions = table.getActiveTimeline().filterPendingCompactionTimeline().getInstants() .map(HoodieInstant::getTimestamp).collect(Collectors.toSet()); HoodieTimeline inflightCommitTimeline = table.getInflightCommitTimeline();
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); Set<String> pendingCompactions = table.getActiveTimeline().filterPendingCompactionTimeline().getInstants() .map(HoodieInstant::getTimestamp).collect(Collectors.toSet()); HoodieTimeline inflightCommitTimeline = table.getInflightCommitTimeline();
HoodieInstant compactionInflightInstant = new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionTime); boolean isCompactionInstantInRequestedState = table.getActiveTimeline().filterPendingCompactionTimeline() .containsInstant(compactionRequestedInstant); boolean isCompactionInstantInInflightState = table.getActiveTimeline().filterPendingCompactionTimeline() .containsInstant(compactionInflightInstant);
HoodieInstant compactionInflightInstant = new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionTime); boolean isCompactionInstantInRequestedState = table.getActiveTimeline().filterPendingCompactionTimeline() .containsInstant(compactionRequestedInstant); boolean isCompactionInstantInInflightState = table.getActiveTimeline().filterPendingCompactionTimeline() .containsInstant(compactionInflightInstant);
public void startCommitWithTime(String instantTime) { if (rollbackInFlight) { // Only rollback inflight commit/delta-commits. Do not touch compaction commits rollbackInflightCommits(); } logger.info("Generate a new instant time " + instantTime); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath()); // if there are pending compactions, their instantTime must not be greater than that of this instant time metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().ifPresent(latestPending -> { Preconditions.checkArgument( HoodieTimeline.compareTimestamps(latestPending.getTimestamp(), instantTime, HoodieTimeline.LESSER), "Latest pending compaction instant time must be earlier " + "than this instant time. Latest Compaction :" + latestPending + ", Ingesting at " + instantTime); }); HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc); HoodieActiveTimeline activeTimeline = table.getActiveTimeline(); String commitActionType = table.getMetaClient().getCommitActionType(); activeTimeline.createInflight(new HoodieInstant(true, commitActionType, instantTime)); }
public void startCommitWithTime(String instantTime) { if (rollbackInFlight) { // Only rollback inflight commit/delta-commits. Do not touch compaction commits rollbackInflightCommits(); } logger.info("Generate a new instant time " + instantTime); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath()); // if there are pending compactions, their instantTime must not be greater than that of this instant time metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().ifPresent(latestPending -> { Preconditions.checkArgument( HoodieTimeline.compareTimestamps(latestPending.getTimestamp(), instantTime, HoodieTimeline.LESSER), "Latest pending compaction instant time must be earlier " + "than this instant time. Latest Compaction :" + latestPending + ", Ingesting at " + instantTime); }); HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc); HoodieActiveTimeline activeTimeline = table.getActiveTimeline(); String commitActionType = table.getMetaClient().getCommitActionType(); activeTimeline.createInflight(new HoodieInstant(true, commitActionType, instantTime)); }
private void scheduleCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { client.scheduleCompactionAtInstant(compactionInstantTime, Optional.empty()); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get(); assertEquals("Last compaction instant must be the one set", instant.getTimestamp(), compactionInstantTime); }
private void moveCompactionFromRequestedToInflight(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime); HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan( metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get()); metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant); HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants() .filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get(); assertTrue("Instant must be marked inflight", instant.isInflight()); }
/** * Ensures compaction instant is in expected state and performs Compaction for the workload stored in instant-time * @param compactionInstantTime Compaction Instant Time * @return * @throws IOException */ private JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean autoCommit) throws IOException { // Create a Hoodie table which encapsulated the commits and files visible HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc); HoodieTimeline pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline(); HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime); if (pendingCompactionTimeline.containsInstant(inflightInstant)) { //inflight compaction - Needs to rollback first deleting new parquet files before we run compaction. rollbackInflightCompaction(inflightInstant, table); // refresh table metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); table = HoodieTable.getHoodieTable(metaClient, config, jsc); pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline(); } HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime); if (pendingCompactionTimeline.containsInstant(instant)) { return runCompaction(instant, metaClient.getActiveTimeline(), autoCommit); } else { throw new IllegalStateException("No Compaction request available at " + compactionInstantTime + " to run compaction"); } }
/** * Ensures compaction instant is in expected state and performs Compaction for the workload stored in instant-time * @param compactionInstantTime Compaction Instant Time * @return * @throws IOException */ private JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean autoCommit) throws IOException { // Create a Hoodie table which encapsulated the commits and files visible HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc); HoodieTimeline pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline(); HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime); if (pendingCompactionTimeline.containsInstant(inflightInstant)) { //inflight compaction - Needs to rollback first deleting new parquet files before we run compaction. rollbackInflightCompaction(inflightInstant, table); // refresh table metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); table = HoodieTable.getHoodieTable(metaClient, config, jsc); pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline(); } HoodieInstant instant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime); if (pendingCompactionTimeline.containsInstant(instant)) { return runCompaction(instant, metaClient.getActiveTimeline(), autoCommit); } else { throw new IllegalStateException("No Compaction request available at " + compactionInstantTime + " to run compaction"); } }
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get(); assertTrue("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp().equals(compactionInstantTime));
metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get(); assertTrue("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp().equals(compactionInstantTime)); metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get(); assertTrue("Pending Compaction instant has expected instant time", pendingCompactionInstant.getTimestamp().equals(compactionInstantTime));