public Path makeTempPath(String partitionPath, int taskPartitionId, String fileName, int stageId, long taskAttemptId) { Path path = new Path(config.getBasePath(), HoodieTableMetaClient.TEMPFOLDER_NAME); return new Path(path.toString(), FSUtils.makeTempDataFileName(partitionPath, commitTime, taskPartitionId, fileName, stageId, taskAttemptId)); }
public Path makeTempPath(String partitionPath, int taskPartitionId, String fileName, int stageId, long taskAttemptId) { Path path = new Path(config.getBasePath(), HoodieTableMetaClient.TEMPFOLDER_NAME); return new Path(path.toString(), FSUtils.makeTempDataFileName(partitionPath, commitTime, taskPartitionId, fileName, stageId, taskAttemptId)); }
public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) { Path path = new Path(config.getBasePath(), partitionPath); try { fs.mkdirs(path); // create a new partition as needed. } catch (IOException e) { throw new HoodieIOException("Failed to make dir " + path, e); } return new Path(path.toString(), FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)); }
public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) { Path path = FSUtils.getPartitionPath(config.getBasePath(), partitionPath); try { fs.mkdirs(path); // create a new partition as needed. } catch (IOException e) { throw new HoodieIOException("Failed to make dir " + path, e); } return new Path(path.toString(), FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)); }
@VisibleForTesting HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight, HoodieIndex index) { this.fs = FSUtils.getFs(clientConfig.getBasePath(), jsc.hadoopConfiguration()); this.jsc = jsc; this.config = clientConfig; this.index = index; this.metrics = new HoodieMetrics(config, config.getTableName()); this.rollbackInFlight = rollbackInFlight; }
@VisibleForTesting HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight, HoodieIndex index) { this.fs = FSUtils.getFs(clientConfig.getBasePath(), jsc.hadoopConfiguration()); this.jsc = jsc; this.config = clientConfig; this.index = index; this.metrics = new HoodieMetrics(config, config.getTableName()); this.rollbackInFlight = rollbackInFlight; }
@Override public JavaRDD<WriteStatus> compact(JavaSparkContext jsc, String compactionInstantTime, HoodieCompactionPlan compactionPlan) { HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor(); try { return compactor.compact(jsc, compactionPlan, this, config, compactionInstantTime); } catch (IOException e) { throw new HoodieCompactionException("Could not compact " + config.getBasePath(), e); } }
protected HoodieTable(HoodieWriteConfig config, JavaSparkContext jsc) { this.config = config; this.metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); this.index = HoodieIndex.createIndex(config, jsc); }
protected HoodieTable(HoodieWriteConfig config, JavaSparkContext jsc) { this.config = config; this.metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); this.index = HoodieIndex.createIndex(config, jsc); }
@Override public JavaRDD<WriteStatus> compact(JavaSparkContext jsc, String compactionInstantTime, HoodieCompactionPlan compactionPlan) { HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor(); try { return compactor.compact(jsc, compactionPlan, this, config, compactionInstantTime); } catch (IOException e) { throw new HoodieCompactionException("Could not compact " + config.getBasePath(), e); } }
/** * Commit changes performed at the given commitTime marker */ public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses, Optional<Map<String, String>> extraMetadata) { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType()); }
/** * Commit changes performed at the given commitTime marker */ public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses, Optional<Map<String, String>> extraMetadata) { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType()); }
private HoodieTable getTableAndInitCtx() { // Create a Hoodie table which encapsulated the commits and files visible // Create a Hoodie table which encapsulated the commits and files visible HoodieTable table = HoodieTable.getHoodieTable( new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) { writeContext = metrics.getCommitCtx(); } else { writeContext = metrics.getDeltaCommitCtx(); } return table; }
private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table, String commitTime) { // Update the index back JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table); // Trigger the insert and collect statuses statuses = statuses.persist(config.getWriteStatusStorageLevel()); commitOnAutoCommit(commitTime, statuses, new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true) .getCommitActionType()); return statuses; }
private HoodieTable getTableAndInitCtx() { // Create a Hoodie table which encapsulated the commits and files visible // Create a Hoodie table which encapsulated the commits and files visible HoodieTable table = HoodieTable.getHoodieTable( new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); if (table.getMetaClient().getCommitActionType() == HoodieTimeline.COMMIT_ACTION) { writeContext = metrics.getCommitCtx(); } else { writeContext = metrics.getDeltaCommitCtx(); } return table; }
private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table, String commitTime) { // Update the index back JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table); // Trigger the insert and collect statuses statuses = statuses.persist(config.getWriteStatusStorageLevel()); commitOnAutoCommit(commitTime, statuses, new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true) .getCommitActionType()); return statuses; }
private List<HoodieDataFile> getCurrentLatestDataFiles(HoodieTable table, HoodieWriteConfig cfg) throws IOException { FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(table.getMetaClient().getFs(), cfg.getBasePath()); HoodieTableFileSystemView view = new HoodieTableFileSystemView(table.getMetaClient(), table.getCompletedCommitsTimeline(), allFiles); List<HoodieDataFile> dataFilesToRead = view.getLatestDataFiles().collect(Collectors.toList()); return dataFilesToRead; }
private void scheduleCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { client.scheduleCompactionAtInstant(compactionInstantTime, Optional.empty()); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get(); assertEquals("Last compaction instant must be the one set", instant.getTimestamp(), compactionInstantTime); }
private void moveCompactionFromRequestedToInflight(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime); HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan( metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get()); metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant); HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants() .filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get(); assertTrue("Instant must be marked inflight", instant.isInflight()); }
@Test public void testArchiveEmptyDataset() throws IOException { HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath) .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .forTable("test-trip-table").build(); HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, new HoodieTableMetaClient(dfs.getConf(), cfg.getBasePath(), true)); boolean result = archiveLog.archiveIfRequired(jsc); assertTrue(result); }