/** * Serialize a dataset {@link FileSystemDataset} to a {@link State} * @param dataset A dataset needs serialization * @param state A state that is used to save {@link gobblin.dataset.Dataset} */ public void save (FileSystemDataset dataset, State state) { state.setProp(SERIALIZE_COMPACTION_FILE_PATH_NAME, dataset.datasetURN()); }
public void onCompactionJobComplete (FileSystemDataset dataset) throws IOException { boolean renamingRequired = this.state.getPropAsBoolean(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_ENABLED, MRCompactor.DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED); if (renamingRequired) { Collection<Path> paths = configurator.getMapReduceInputPaths(); for (Path path: paths) { Path newPath = new Path (path.getParent(), path.getName() + MRCompactor.COMPACTION_RENAME_SOURCE_DIR_SUFFIX); log.info("[{}] Renaming {} to {}", dataset.datasetURN(), path, newPath); fs.rename(path, newPath); } // submit events if directory is renamed if (eventSubmitter != null) { Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.RENAME_DIR_PATHS, Joiner.on(',').join(paths)); this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_MARK_DIR_EVENT, eventMetadataMap); } } }
private void parseTimeAndDatasetName (FileSystemDataset dataset, CompactionParserResult rst) { String commonBase = rst.getSrcBaseDir(); String fullPath = dataset.datasetURN(); int startPos = fullPath.indexOf(commonBase) + commonBase.length(); String relative = StringUtils.removeStart(fullPath.substring(startPos), "/"); int delimiterStart = StringUtils.indexOf(relative, rst.getSrcSubDir()); if (delimiterStart == -1) { throw new StringIndexOutOfBoundsException(); } int delimiterEnd = relative.indexOf("/", delimiterStart); String datasetName = StringUtils.removeEnd(relative.substring(0, delimiterStart), "/"); String timeString = StringUtils.removeEnd(relative.substring(delimiterEnd + 1), "/"); rst.datasetName = datasetName; rst.timeString = timeString; rst.time = getTime (timeString); }
public void onCompactionJobComplete(FileSystemDataset dataset) throws IOException { if (state.contains(ConfigurationKeys.HIVE_REGISTRATION_POLICY)) { HiveRegister hiveRegister = HiveRegister.get(state); HiveRegistrationPolicy hiveRegistrationPolicy = HiveRegistrationPolicyBase.getPolicy(state); CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset); List<String> paths = new ArrayList<>(); for (HiveSpec spec : hiveRegistrationPolicy.getHiveSpecs(new Path(result.getDstAbsoluteDir()))) { hiveRegister.register(spec); paths.add(spec.getPath().toUri().toASCIIString()); log.info("Hive registration is done for {}", result.getDstAbsoluteDir()); } // submit events for hive registration if (eventSubmitter != null) { Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.HIVE_REGISTRATION_PATHS, Joiner.on(',').join(paths)); this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_HIVE_REGISTRATION_EVENT, eventMetadataMap); } } }
double newRecords = helper.calculateRecordCount (Lists.newArrayList(new Path(dataset.datasetURN()))); double oldRecords = InputRecordCountHelper.readRecordCount (helper.getFs(), new Path(result.getDstAbsoluteDir())); log.info ("Dataset {} : previous records {}, current records {}", dataset.datasetURN(), oldRecords, newRecords); if (oldRecords == 0) { return true; log.info ("Dataset {} records exceeded the threshold {}", dataset.datasetURN(), threshold); return true;
Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.RECORD_COUNT_TOTAL, Long.toString(newTotalRecords)); this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_RECORD_COUNT_EVENT, eventMetadataMap);