private Optional<byte[]> readDataFromPath(Path detailPath) { try (FSDataInputStream is = metaClient.getFs().open(detailPath)) { return Optional.of(IOUtils.toByteArray(is)); } catch (IOException e) { throw new HoodieIOException("Could not read commit details from " + detailPath, e); } }
private void createFileInPath(Path fullPath, Optional<byte[]> content) { try { // If the path does not exist, create it first if (!metaClient.getFs().exists(fullPath)) { if (metaClient.getFs().createNewFile(fullPath)) { log.info("Created a new file in meta path: " + fullPath); } else { throw new HoodieIOException("Failed to create file " + fullPath); } } if (content.isPresent()) { FSDataOutputStream fsout = metaClient.getFs().create(fullPath, true); fsout.write(content.get()); fsout.close(); } } catch (IOException e) { throw new HoodieIOException("Failed to create file " + fullPath, e); } }
/** * Common method used for cleaning out parquet files under a partition path during rollback of a * set of commits */ protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> results, String partitionPath, PathFilter filter) throws IOException { logger.info("Cleaning path " + partitionPath); FileSystem fs = getMetaClient().getFs(); FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter); for (FileStatus file : toBeDeleted) { boolean success = fs.delete(file.getPath(), false); results.put(file, success); logger.info("Delete file " + file.getPath() + "\t" + success); } return results; }
/** * Common method used for cleaning out parquet files under a partition path during rollback of a * set of commits */ protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> results, String partitionPath, PathFilter filter) throws IOException { logger.info("Cleaning path " + partitionPath); FileSystem fs = getMetaClient().getFs(); FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter); for (FileStatus file : toBeDeleted) { boolean success = fs.delete(file.getPath(), false); results.put(file, success); logger.info("Delete file " + file.getPath() + "\t" + success); } return results; }
private static PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat> deleteFilesFunc( HoodieTable table) { return (PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat>) iter -> { Map<String, PartitionCleanStat> partitionCleanStatMap = new HashMap<>(); FileSystem fs = table.getMetaClient().getFs(); while (iter.hasNext()) { Tuple2<String, String> partitionDelFileTuple = iter.next(); String partitionPath = partitionDelFileTuple._1(); String deletePathStr = partitionDelFileTuple._2(); Boolean deletedFileResult = deleteFileAndGetResult(fs, deletePathStr); if (!partitionCleanStatMap.containsKey(partitionPath)) { partitionCleanStatMap.put(partitionPath, new PartitionCleanStat(partitionPath)); } PartitionCleanStat partitionCleanStat = partitionCleanStatMap.get(partitionPath); partitionCleanStat.addDeleteFilePatterns(deletePathStr); partitionCleanStat.addDeletedFileResult(deletePathStr, deletedFileResult); } return partitionCleanStatMap.entrySet().stream() .map(e -> new Tuple2<>(e.getKey(), e.getValue())) .collect(Collectors.toList()).iterator(); }; }
private void deleteInstantFile(HoodieInstant instant) { log.info("Deleting instant " + instant); Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), instant.getFileName()); try { boolean result = metaClient.getFs().delete(inFlightCommitFilePath, false); if (result) { log.info("Removed in-flight " + instant); } else { throw new HoodieIOException("Could not delete in-flight instant " + instant); } } catch (IOException e) { throw new HoodieIOException( "Could not remove inflight commit " + inFlightCommitFilePath, e); } }
protected HoodieActiveTimeline(HoodieTableMetaClient metaClient, Set<String> includedExtensions) { // Filter all the filter in the metapath and include only the extensions passed and // convert them into HoodieInstant try { this.instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(), new Path(metaClient.getMetaPath()), includedExtensions); log.info("Loaded instants " + instants); } catch (IOException e) { throw new HoodieIOException("Failed to scan metadata", e); } this.metaClient = metaClient; // multiple casts will make this lambda serializable - // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16 this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails; }
/** * Given a partition path, obtain all filegroups within that. All methods, that work at the * partition level go through this. */ @Override public Stream<HoodieFileGroup> getAllFileGroups(String partitionPathStr) { // return any previously fetched groups. if (partitionToFileGroupsMap.containsKey(partitionPathStr)) { return partitionToFileGroupsMap.get(partitionPathStr).stream(); } try { // Create the path if it does not exist already Path partitionPath = FSUtils.getPartitionPath(metaClient.getBasePath(), partitionPathStr); FSUtils.createPathIfNotExists(metaClient.getFs(), partitionPath); FileStatus[] statuses = metaClient.getFs().listStatus(partitionPath); List<HoodieFileGroup> fileGroups = addFilesToView(statuses); return fileGroups.stream(); } catch (IOException e) { throw new HoodieIOException( "Failed to list data files in partition " + partitionPathStr, e); } }
/** * Rename log files. This is done for un-scheduling a pending compaction operation NOTE: Can only be used safely when * no writer (ingestion/compaction) is running. * * @param metaClient Hoodie Table Meta-Client * @param oldLogFile Old Log File * @param newLogFile New Log File */ protected static void renameLogFile(HoodieTableMetaClient metaClient, HoodieLogFile oldLogFile, HoodieLogFile newLogFile) throws IOException { FileStatus[] statuses = metaClient.getFs().listStatus(oldLogFile.getPath()); Preconditions.checkArgument(statuses.length == 1, "Only one status must be present"); Preconditions.checkArgument(statuses[0].isFile(), "Source File must exist"); Preconditions.checkArgument(oldLogFile.getPath().getParent().equals(newLogFile.getPath().getParent()), "Log file must only be moved within the parent directory"); metaClient.getFs().rename(oldLogFile.getPath(), newLogFile.getPath()); }
private void revertCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) { Preconditions.checkArgument(completed.getTimestamp().equals(inflight.getTimestamp())); Path inFlightCommitFilePath = new Path(metaClient.getMetaPath(), inflight.getFileName()); try { if (!metaClient.getFs().exists(inFlightCommitFilePath)) { Path commitFilePath = new Path(metaClient.getMetaPath(), completed.getFileName()); boolean success = metaClient.getFs().rename(commitFilePath, inFlightCommitFilePath); if (!success) { throw new HoodieIOException( "Could not rename " + commitFilePath + " to " + inFlightCommitFilePath); } } } catch (IOException e) { throw new HoodieIOException("Could not complete revert " + completed, e); } }
/** * Deletes any new tmp files written during the current commit, into the partition */ public static void cleanupTmpFilesFromCurrentCommit(HoodieWriteConfig config, String commitTime, String partitionPath, int taskPartitionId, HoodieTable hoodieTable) { FileSystem fs = hoodieTable.getMetaClient().getFs(); try { FileStatus[] prevFailedFiles = fs.globStatus(new Path(String .format("%s/%s/%s", config.getBasePath(), partitionPath, FSUtils.maskWithoutFileId(commitTime, taskPartitionId)))); if (prevFailedFiles != null) { logger.info( "Deleting " + prevFailedFiles.length + " files generated by previous failed attempts."); for (FileStatus status : prevFailedFiles) { fs.delete(status.getPath(), false); } } } catch (IOException e) { throw new HoodieIOException("Failed to cleanup Temp files from commit " + commitTime, e); } }
public HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad) throws DatasetNotFoundException { log.info("Loading HoodieTableMetaClient from " + basePath); this.basePath = basePath; this.hadoopConf = new SerializableConfiguration(conf); Path basePathDir = new Path(this.basePath); this.metaPath = basePath + File.separator + METAFOLDER_NAME; Path metaPathDir = new Path(this.metaPath); this.fs = getFs(); DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir); this.tableConfig = new HoodieTableConfig(fs, metaPath); this.tableType = tableConfig.getTableType(); log.info("Finished Loading Table of type " + tableType + " from " + basePath); if (loadActiveTimelineOnLoad) { log.info("Loading Active commit timeline for " + basePath); getActiveTimeline(); } }
/** * Describes table properties */ @CliCommand(value = "desc", help = "Describle Hoodie Table properties") public String descTable() { TableHeader header = new TableHeader() .addTableHeaderField("Property") .addTableHeaderField("Value"); List<Comparable[]> rows = new ArrayList<>(); rows.add(new Comparable[]{"basePath", HoodieCLI.tableMetadata.getBasePath()}); rows.add(new Comparable[]{"metaPath", HoodieCLI.tableMetadata.getMetaPath()}); rows.add(new Comparable[]{"fileSystem", HoodieCLI.tableMetadata.getFs().getScheme()}); HoodieCLI.tableMetadata.getTableConfig().getProps().entrySet().forEach(e -> { rows.add(new Comparable[]{e.getKey(), e.getValue()}); }); return HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows); } }
/** * END - COMPACTION RELATED META-DATA MANAGEMENT **/ private void transitionState(HoodieInstant fromInstant, HoodieInstant toInstant, Optional<byte[]> data) { Preconditions.checkArgument(fromInstant.getTimestamp().equals(toInstant.getTimestamp())); Path commitFilePath = new Path(metaClient.getMetaPath(), toInstant.getFileName()); try { // open a new file and write the commit metadata in Path inflightCommitFile = new Path(metaClient.getMetaPath(), fromInstant.getFileName()); createFileInMetaPath(fromInstant.getFileName(), data); boolean success = metaClient.getFs().rename(inflightCommitFile, commitFilePath); if (!success) { throw new HoodieIOException( "Could not rename " + inflightCommitFile + " to " + commitFilePath); } } catch (IOException e) { throw new HoodieIOException("Could not complete " + fromInstant, e); } }
private HoodieLogFormat.Writer openWriter() { try { if (this.writer == null) { return HoodieLogFormat.newWriterBuilder().onParentPath(archiveFilePath.getParent()) .withFileId(archiveFilePath.getName()) .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFs(metaClient.getFs()) .overBaseCommit("").build(); } else { return this.writer; } } catch (InterruptedException | IOException e) { throw new HoodieException("Unable to initialize HoodieLogFormat writer", e); } }
public HoodieIOHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable) { this.commitTime = commitTime; this.config = config; this.fs = hoodieTable.getMetaClient().getFs(); this.hoodieTable = hoodieTable; this.hoodieTimeline = hoodieTable.getCompletedCommitTimeline(); this.schema = createHoodieWriteSchema(config); this.timer = new HoodieTimer().startTimer(); }
private HoodieLogFormat.Writer openWriter() { try { if (this.writer == null) { return HoodieLogFormat.newWriterBuilder().onParentPath(archiveFilePath.getParent()) .withFileId(archiveFilePath.getName()) .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFs(metaClient.getFs()) .overBaseCommit("").build(); } else { return this.writer; } } catch (InterruptedException | IOException e) { throw new HoodieException("Unable to initialize HoodieLogFormat writer", e); } }
/** * Load all involved files as <Partition, filename> pair RDD from all partitions in the table. */ @Override @VisibleForTesting List<Tuple2<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitions, final JavaSparkContext jsc, final HoodieTable hoodieTable) { HoodieTableMetaClient metaClient = hoodieTable.getMetaClient(); try { List<String> allPartitionPaths = FSUtils .getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(), config.shouldAssumeDatePartitioning()); return super.loadInvolvedFiles(allPartitionPaths, jsc, hoodieTable); } catch (IOException e) { throw new HoodieIOException("Failed to load all partitions", e); } }
public HoodieIOHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable) { this.commitTime = commitTime; this.config = config; this.fs = hoodieTable.getMetaClient().getFs(); this.hoodieTable = hoodieTable; this.hoodieTimeline = hoodieTable.getCompletedCommitsTimeline(); this.schema = createHoodieWriteSchema(config); this.timer = new HoodieTimer().startTimer(); this.writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName()); }
private List<HoodieDataFile> getCurrentLatestDataFiles(HoodieTable table, HoodieWriteConfig cfg) throws IOException { FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(table.getMetaClient().getFs(), cfg.getBasePath()); HoodieTableFileSystemView view = new HoodieTableFileSystemView(table.getMetaClient(), table.getCompletedCommitsTimeline(), allFiles); List<HoodieDataFile> dataFilesToRead = view.getLatestDataFiles().collect(Collectors.toList()); return dataFilesToRead; }