@Override public void onRunFinish(boolean succeeded, BatchSinkContext context) { if (succeeded && tpfsSinkConfig.cleanPartitionsOlderThan != null) { long cutoffTime = context.getLogicalStartTime() - TimeParser.parseDuration(tpfsSinkConfig.cleanPartitionsOlderThan); TimePartitionedFileSet tpfs = context.getDataset(tpfsSinkConfig.name); for (TimePartitionDetail timePartitionDetail : tpfs.getPartitionsByTime(0, cutoffTime)) { LOG.info("Cleaning up partitions older than {}", tpfsSinkConfig.cleanPartitionsOlderThan); tpfs.dropPartition(timePartitionDetail.getTime()); } } } }
@Override public void prepareRun(BatchSinkContext context) throws DatasetManagementException, InstantiationException { // if macros were provided, the dataset still needs to be created config.validate(); OutputFormatProvider outputFormatProvider = context.newPluginInstance(FORMAT_PLUGIN_ID); DatasetProperties datasetProperties = createProperties(outputFormatProvider); if (!context.datasetExists(config.getName())) { context.createDataset(config.getName(), PartitionedFileSet.class.getName(), datasetProperties); } PartitionedFileSet files = context.getDataset(config.getName()); snapshotFileSet = new SnapshotFileSet(files); // need to use all the dataset properties as arguments in case the dataset already exists, // created by the previous version of this plugin before output format plugins were used. // in that scenario, the output format attached to the dataset properties will be incorrect, // and must be overridden here. Map<String, String> arguments = new HashMap<>(datasetProperties.getProperties()); if (config.getFileProperties() != null) { arguments = GSON.fromJson(config.getFileProperties(), MAP_TYPE); } context.addOutput(Output.ofDataset(config.getName(), snapshotFileSet.getOutputArguments(context.getLogicalStartTime(), arguments))); }