/** * Performs cleaning of partition paths according to cleaning policy and returns the number of * files cleaned. Handles skews in partitions to clean by making files to clean as the unit of * task distribution. * * @throws IllegalArgumentException if unknown cleaning policy is provided */ @Override public List<HoodieCleanStat> clean(JavaSparkContext jsc) { try { FileSystem fs = getMetaClient().getFs(); List<String> partitionsToClean = FSUtils .getAllPartitionPaths(fs, getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()); logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config .getCleanerPolicy()); if (partitionsToClean.isEmpty()) { logger.info("Nothing to clean here mom. It is already clean"); return Collections.emptyList(); } return cleanPartitionPaths(partitionsToClean, jsc); } catch (IOException e) { throw new HoodieIOException("Failed to clean up after commit", e); } }
/** * Performs cleaning of partition paths according to cleaning policy and returns the number of * files cleaned. Handles skews in partitions to clean by making files to clean as the unit of * task distribution. * * @throws IllegalArgumentException if unknown cleaning policy is provided */ @Override public List<HoodieCleanStat> clean(JavaSparkContext jsc) { try { FileSystem fs = getMetaClient().getFs(); List<String> partitionsToClean = FSUtils .getAllPartitionPaths(fs, getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()); logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config .getCleanerPolicy()); if (partitionsToClean.isEmpty()) { logger.info("Nothing to clean here mom. It is already clean"); return Collections.emptyList(); } return cleanPartitionPaths(partitionsToClean, jsc); } catch (IOException e) { throw new HoodieIOException("Failed to clean up after commit", e); } }