@Override public String toString() { return this.dataset.datasetURN() + "@" + this.name; } }
@Override public Iterator<T> getDatasetsIterator() throws IOException { return this.datasetFinder.findDatasets().iterator(); } }
/** * Deletes any empty paths in <code>possiblyEmptyDirectories</code> all the way upto the {@link FileSystemDataset#datasetRoot()}. */ public void cleanEmptyDirectories(final Set<Path> possiblyEmptyDirectories, final FileSystemDataset fsDataset) throws IOException { if (this.deleteEmptyDirectories && !this.simulate) { for (Path parentDirectory : possiblyEmptyDirectories) { PathUtils.deleteEmptyParentDirectories(fs, fsDataset.datasetRoot(), parentDirectory); } } } }
/** * Serialize a dataset {@link FileSystemDataset} to a {@link State} * @param dataset A dataset needs serialization * @param state A state that is used to save {@link gobblin.dataset.Dataset} */ public void save (FileSystemDataset dataset, State state) { state.setProp(SERIALIZE_COMPACTION_FILE_PATH_NAME, dataset.datasetURN()); }
public DatasetVerificationException (Dataset dataset, Throwable cause) { super ("Dataset:" + dataset.datasetURN() + " Exception:" + cause); this.dataset = dataset; this.cause = cause; } }
@Override public Collection<FileStatusDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException { return Lists.newArrayList(new FileStatusDatasetVersion(this.fs.getFileStatus(((FileSystemDataset) dataset) .datasetRoot()))); } }
@Override public List<Dataset> findDatasets() throws IOException { List<Dataset> datasets = Lists.newArrayList(); for (DatasetsFinder<Dataset> df : this.datasetFinders) { datasets.addAll(df.findDatasets()); } return datasets; }
public void onCompactionJobComplete (FileSystemDataset dataset) throws IOException { boolean renamingRequired = this.state.getPropAsBoolean(MRCompactor.COMPACTION_RENAME_SOURCE_DIR_ENABLED, MRCompactor.DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED); if (renamingRequired) { Collection<Path> paths = configurator.getMapReduceInputPaths(); for (Path path: paths) { Path newPath = new Path (path.getParent(), path.getName() + MRCompactor.COMPACTION_RENAME_SOURCE_DIR_SUFFIX); log.info("[{}] Renaming {} to {}", dataset.datasetURN(), path, newPath); fs.rename(path, newPath); } // submit events if directory is renamed if (eventSubmitter != null) { Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, dataset.datasetURN(), CompactionSlaEventHelper.RENAME_DIR_PATHS, Joiner.on(',').join(paths)); this.eventSubmitter.submit(CompactionSlaEventHelper.COMPACTION_MARK_DIR_EVENT, eventMetadataMap); } } }
@Override public void run() { log.error ("Compaction job for " + dataset.datasetURN() + " is failed. Please take a look"); this.workingState = WorkUnitState.WorkingState.FAILED; }
@Override public Collection<FileStatusDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException { return Lists.newArrayList(new FileStatusDatasetVersion(this.fs.getFileStatus(((FileSystemDataset) dataset) .datasetRoot()))); } }
private void parseTimeAndDatasetName (FileSystemDataset dataset, CompactionParserResult rst) { String commonBase = rst.getSrcBaseDir(); String fullPath = dataset.datasetURN(); int startPos = fullPath.indexOf(commonBase) + commonBase.length(); String relative = StringUtils.removeStart(fullPath.substring(startPos), "/"); int delimiterStart = StringUtils.indexOf(relative, rst.getSrcSubDir()); if (delimiterStart == -1) { throw new StringIndexOutOfBoundsException(); } int delimiterEnd = relative.indexOf("/", delimiterStart); String datasetName = StringUtils.removeEnd(relative.substring(0, delimiterStart), "/"); String timeString = StringUtils.removeEnd(relative.substring(delimiterEnd + 1), "/"); rst.datasetName = datasetName; rst.timeString = timeString; rst.time = getTime (timeString); }
private void submitEvent(String eventName) { Map<String, String> eventMetadataMap = ImmutableMap.of(CompactionSlaEventHelper.DATASET_URN, this.dataset.datasetURN()); this.eventSubmitter.submit(eventName, eventMetadataMap); }
@Override public Collection<TimestampedDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException { FileSystemDataset fsDataset = (FileSystemDataset) dataset; FileStatus status = this.fs.getFileStatus(fsDataset.datasetRoot()); return Lists.newArrayList(new TimestampedDatasetVersion(new DateTime(status.getModificationTime()), fsDataset .datasetRoot())); } }
@Override public void onSuccess(Void arg0) { DatasetCleaner.this.finishCleanSignal.get().countDown(); LOG.info("Successfully cleaned: " + dataset.datasetURN()); Instrumented.markMeter(DatasetCleaner.this.datasetsCleanSuccessMeter); }
@Override public Collection<TimestampedDatasetVersion> findDatasetVersions(Dataset dataset) { FileSystemDataset fsDataset = (FileSystemDataset) dataset; try { List<TimestampedDatasetVersion> timestampedVersions = Lists.newArrayList(); for (FileStatus fileStatus : FileListUtils.listMostNestedPathRecursively(this.fs, fsDataset.datasetRoot())) { timestampedVersions.add(new TimestampedDatasetVersion(new DateTime(fileStatus.getModificationTime()), fileStatus.getPath())); } return timestampedVersions; } catch (IOException e) { LOGGER.warn("Failed to get ModifiedTimeStamp for candidate dataset version at " + fsDataset.datasetRoot() + ". Ignoring."); return Lists.newArrayList(); } } }
private int findTier(Requestor<SimpleDatasetRequest> requestor) { Dataset dataset = ((SimpleDatasetRequestor) requestor).getDataset(); for (Map.Entry<Integer, Pattern> tier : tiersMap.entrySet()) { Pattern pattern = tier.getValue(); if (pattern.matcher(dataset.datasetURN()).find()) { return tier.getKey(); } } return Integer.MAX_VALUE; } }
/** * Refer to {@link MRCompactorAvroKeyDedupJobRunner#configureInputAndOutputPaths(Job)} */ protected void configureInputAndOutputPaths(Job job, FileSystemDataset dataset) throws IOException { this.mapReduceInputPaths = getGranularInputPaths(dataset.datasetRoot()); for (Path path: mapReduceInputPaths) { FileInputFormat.addInputPath(job, path); } String mrOutputBase = this.state.getProp(MRCompactor.COMPACTION_JOB_DIR); CompactionPathParser parser = new CompactionPathParser(this.state); CompactionPathParser.CompactionParserResult rst = parser.parse(dataset); this.mrOutputPath = concatPaths (mrOutputBase, rst.getDatasetName(), rst.getDstSubDir(), rst.getTimeString()); log.info ("Cleaning temporary MR output directory: " + mrOutputPath); this.fs.delete(mrOutputPath, true); FileOutputFormat.setOutputPath(job, mrOutputPath); }
@Override public int compare(FileSet<CopyEntity> p1, FileSet<CopyEntity> p2) { int userProvidedCompare = this.userProvidedComparator.compare(p1, p2); if (userProvidedCompare == 0) { int datasetCompare = p1.getDataset().datasetURN().compareTo(p2.getDataset().datasetURN()); if (datasetCompare == 0) { return p1.getName().compareTo(p2.getName()); } return datasetCompare; } return userProvidedCompare; } }
@Override public void onFailure(Throwable throwable) { DatasetCleaner.this.finishCleanSignal.get().countDown(); LOG.warn("Exception caught when cleaning " + dataset.datasetURN() + ".", throwable); DatasetCleaner.this.throwables.add(throwable); Instrumented.markMeter(DatasetCleaner.this.datasetsCleanFailureMeter); DatasetCleaner.this.eventSubmitter.submit(RetentionEvents.CleanFailed.EVENT_NAME, ImmutableMap.of(RetentionEvents.CleanFailed.FAILURE_CONTEXT_METADATA_KEY, ExceptionUtils.getFullStackTrace(throwable), RetentionEvents.DATASET_URN_METADATA_KEY, dataset.datasetURN())); }
/** * Below three steps are performed for a compaction task: * Do verifications before a map-reduce job is launched. * Start a map-reduce job and wait until it is finished * Do post-actions after map-reduce job is finished */ @Override public void run() { List<CompactionVerifier> verifiers = this.suite.getMapReduceVerifiers(); for (CompactionVerifier verifier : verifiers) { if (!verifier.verify(dataset)) { log.error("Verification {} for {} is not passed.", verifier.getName(), dataset.datasetURN()); this.onMRTaskComplete (false, new IOException("Compaction verification for MR is failed")); return; } } super.run(); }