/** * Schedules a new compaction instant * @param extraMetadata * @return */ public Optional<String> scheduleCompaction(Optional<Map<String, String>> extraMetadata) throws IOException { String instantTime = HoodieActiveTimeline.createNewCommitTime(); logger.info("Generate a new instant time " + instantTime); boolean notEmpty = scheduleCompactionAtInstant(instantTime, extraMetadata); return notEmpty ? Optional.of(instantTime) : Optional.empty(); }
/** * Clean up any stale/old files/data lying around (either on file storage or index storage) based * on the configurations and CleaningPolicy used. (typically files that no longer can be used by a * running query can be cleaned) */ public void clean() throws HoodieIOException { String startCleanTime = HoodieActiveTimeline.createNewCommitTime(); clean(startCleanTime); }
/** * Clean up any stale/old files/data lying around (either on file storage or index storage) based * on the configurations and CleaningPolicy used. (typically files that no longer can be used by a * running query can be cleaned) */ public void clean() throws HoodieIOException { String startCleanTime = HoodieActiveTimeline.createNewCommitTime(); clean(startCleanTime); }
/** * Provides a new commit time for a write operation (insert/update) */ public String startCommit() { String commitTime = HoodieActiveTimeline.createNewCommitTime(); startCommitWithTime(commitTime); return commitTime; }
/** * Schedules a new compaction instant * @param extraMetadata * @return */ public Optional<String> scheduleCompaction(Optional<Map<String, String>> extraMetadata) throws IOException { String instantTime = HoodieActiveTimeline.createNewCommitTime(); logger.info("Generate a new instant time " + instantTime); boolean notEmpty = scheduleCompactionAtInstant(instantTime, extraMetadata); return notEmpty ? Optional.of(instantTime) : Optional.empty(); }
/** * Provides a new commit time for a write operation (insert/update) */ public String startCommit() { String commitTime = HoodieActiveTimeline.createNewCommitTime(); startCommitWithTime(commitTime); return commitTime; }
public static List<IndexedRecord> generateHoodieTestRecords(int from, int limit) throws IOException, URISyntaxException { List<IndexedRecord> records = generateTestRecords(from, limit); String commitTime = HoodieActiveTimeline.createNewCommitTime(); Schema hoodieFieldsSchema = HoodieAvroUtils.addMetadataFields(getSimpleSchema()); return records.stream().map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, hoodieFieldsSchema)).map(p -> { p.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, UUID.randomUUID().toString()); p.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00"); p.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime); return p; }).collect(Collectors.toList()); }
@CliCommand(value = "compaction schedule", help = "Schedule Compaction") public String scheduleCompact( @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "1G", help = "Spark executor memory") final String sparkMemory) throws Exception { boolean initialized = HoodieCLI.initConf(); HoodieCLI.initFS(initialized); // First get a compaction instant time and pass it to spark launcher for scheduling compaction String compactionInstantTime = HoodieActiveTimeline.createNewCommitTime(); if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) { String sparkPropertiesPath = Utils.getDefaultPropertiesFile( scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties())); SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); sparkLauncher.addAppArgs(SparkCommand.COMPACT_SCHEDULE.toString(), HoodieCLI.tableMetadata.getBasePath(), HoodieCLI.tableMetadata.getTableConfig().getTableName(), compactionInstantTime, sparkMemory); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); if (exitCode != 0) { return "Failed to run compaction for " + compactionInstantTime; } return "Compaction successfully completed for " + compactionInstantTime; } else { throw new Exception("Compactions can only be run for table type : MERGE_ON_READ"); } }
recordsToUpdate.add((IndexedRecord) record.getData().getInsertValue(schema).get()); String newCommitTime = HoodieActiveTimeline.createNewCommitTime(); List<String> keysToBeUpdated = new ArrayList<>(); keysToBeUpdated.add(key); recordsToUpdate.add((IndexedRecord) record.getData().getInsertValue(schema).get()); newCommitTime = HoodieActiveTimeline.createNewCommitTime(); keysToBeUpdated = new ArrayList<>(); keysToBeUpdated.add(key);
SchemaTestUtil.generateHoodieTestRecords(0, 100), HoodieActiveTimeline.createNewCommitTime());
@Test(expected = HoodieNotSupportedException.class) public void testCompactionOnCopyOnWriteFail() throws Exception { HoodieTestUtils.initTableType(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(), jsc); String compactionInstantTime = HoodieActiveTimeline.createNewCommitTime(); table.compact(jsc, compactionInstantTime, table.scheduleCompaction(jsc, compactionInstantTime)); }
table = HoodieTable.getHoodieTable(metaClient, config, jsc); String compactionInstantTime = HoodieActiveTimeline.createNewCommitTime(); JavaRDD<WriteStatus> result = table.compact(jsc, compactionInstantTime, table.scheduleCompaction(jsc, compactionInstantTime));
@Test public void testCompactionEmpty() throws Exception { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieWriteConfig config = getConfig(); HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc); HoodieWriteClient writeClient = new HoodieWriteClient(jsc, config); String newCommitTime = writeClient.startCommit(); List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100); JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1); writeClient.insert(recordsRDD, newCommitTime).collect(); String compactionInstantTime = HoodieActiveTimeline.createNewCommitTime(); JavaRDD<WriteStatus> result = table.compact(jsc, compactionInstantTime, table.scheduleCompaction(jsc, compactionInstantTime)); assertTrue("If there is nothing to compact, result will be empty", result.isEmpty()); }