public static List<String> filter(List<String> topics, List<Pattern> blacklist, List<Pattern> whitelist) { List<String> result = Lists.newArrayList(); for (String topic : topics) { if (survived(topic, blacklist, whitelist)) { result.add(topic); } } return result; }
public static List<Pattern> getPatternList(State state, String propKey) { return getPatternList(state, propKey, StringUtils.EMPTY); }
public static double getRatioThresholdByDatasetName (String datasetName, Map<String, Double> datasetRegexAndRecompactThreshold) { for (Map.Entry<String, Double> topicRegexEntry : datasetRegexAndRecompactThreshold.entrySet()) { if (DatasetFilterUtils.stringInPatterns(datasetName, DatasetFilterUtils.getPatternsFromStrings(Splitter.on(DATASETS_WITH_SAME_RECOMPACT_THRESHOLDS_SEPARATOR) .trimResults().omitEmptyStrings().splitToList(topicRegexEntry.getKey())))) { return topicRegexEntry.getValue(); } } return MRCompactor.DEFAULT_COMPACTION_LATEDATA_THRESHOLD_FOR_RECOMPACT_PER_DATASET; }
/** * Set the regex patterns used to filter logs that should not be copied. * * @param regexList a comma-separated list of regex patterns * @return this {@link LogCopier.Builder} instance */ public Builder useExcludingRegexPatterns(String regexList) { Preconditions.checkNotNull(regexList); this.excludingRegexPatterns = DatasetFilterUtils.getPatternsFromStrings(COMMA_SPLITTER.splitToList(regexList)); return this; }
protected double getDatasetPriority(String datasetName) { double priority = LOW_PRIORITY; if (DatasetFilterUtils.stringInPatterns(datasetName, this.highPriority)) { priority = HIGH_PRIORITY; } else if (DatasetFilterUtils.stringInPatterns(datasetName, this.normalPriority)) { priority = NORMAL_PRIORITY; } return priority; }
/** * Set the regex patterns used to filter logs that should be copied. * * @param regexList a comma-separated list of regex patterns * @return this {@link LogCopier.Builder} instance */ public Builder useIncludingRegexPatterns(String regexList) { Preconditions.checkNotNull(regexList); this.includingRegexPatterns = DatasetFilterUtils.getPatternsFromStrings(COMMA_SPLITTER.splitToList(regexList)); return this; }
/** * A topic survives if (1) it doesn't match the blacklist, and * (2) either whitelist is empty, or it matches the whitelist. * Whitelist and blacklist use regex patterns (NOT glob patterns). */ public static boolean survived(String topic, List<Pattern> blacklist, List<Pattern> whitelist) { if (stringInPatterns(topic, blacklist)) { return false; } return (whitelist.isEmpty() || stringInPatterns(topic, whitelist)); }
public static Set<String> filter(Set<String> topics, List<Pattern> blacklist, List<Pattern> whitelist) { Set<String> result = Sets.newHashSet(); for (String topic : topics) { if (survived(topic, blacklist, whitelist)) { result.add(topic); } } return result; }
private List<Pattern> getNormalPriorityPatterns() { List<String> list = this.state.getPropAsList(MRCompactor.COMPACTION_NORMAL_PRIORITY_TOPICS, StringUtils.EMPTY); return DatasetFilterUtils.getPatternsFromStrings(list); }
/** * Check if a log line should be copied. * * <p> * A line should be copied if and only if all of the following conditions satisfy: * * <ul> * <li> * It doesn't match any of the excluding regex patterns. If there's no excluding regex patterns, * this condition is considered satisfied. * </li> * <li> * It matches at least one of the including regex patterns. If there's no including regex patterns, * this condition is considered satisfied. * </li> * </ul> * </p> */ private boolean shouldCopyLine(String line) { boolean including = !LogCopier.this.includingRegexPatterns.isPresent() || DatasetFilterUtils.stringInPatterns(line, LogCopier.this.includingRegexPatterns.get()); boolean excluding = LogCopier.this.excludingRegexPatterns.isPresent() && DatasetFilterUtils.stringInPatterns(line, LogCopier.this.excludingRegexPatterns.get()); return !excluding && including; } }
private void verifyDataCompleteness() { List<Pattern> blacklist = DatasetFilterUtils.getPatternList(this.state, COMPACTION_COMPLETENESS_VERIFICATION_BLACKLIST); List<Pattern> whitelist = DatasetFilterUtils.getPatternList(this.state, COMPACTION_COMPLETENESS_VERIFICATION_WHITELIST); int numDatasetsVerifiedTogether = getNumDatasetsVerifiedTogether(); List<Dataset> datasetsToBeVerified = Lists.newArrayList(); for (Dataset dataset : this.datasets) { if (dataset.state() != UNVERIFIED) { continue; } if (shouldVerifyCompletenessForDataset(dataset, blacklist, whitelist)) { datasetsToBeVerified.add(dataset); if (datasetsToBeVerified.size() >= numDatasetsVerifiedTogether) { ListenableFuture<Results> future = this.verifier.get().verify(datasetsToBeVerified); addCallback(datasetsToBeVerified, future); datasetsToBeVerified = Lists.newArrayList(); } } else { dataset.setState(VERIFIED); } } if (!datasetsToBeVerified.isEmpty()) { ListenableFuture<Results> future = this.verifier.get().verify(datasetsToBeVerified); addCallback(datasetsToBeVerified, future); } }
private List<TopicMetadata> fetchTopicMetadataFromBroker(String broker, List<Pattern> blacklist, List<Pattern> whitelist) { List<TopicMetadata> topicMetadataList = fetchTopicMetadataFromBroker(broker); if (topicMetadataList == null) { return null; } List<TopicMetadata> filteredTopicMetadataList = Lists.newArrayList(); for (TopicMetadata topicMetadata : topicMetadataList) { if (DatasetFilterUtils.survived(topicMetadata.topic(), blacklist, whitelist)) { filteredTopicMetadataList.add(topicMetadata); } } return filteredTopicMetadataList; }
private void setTopicsFromConfigStore(State state) { Set<String> blacklistTopicsFromConfigStore = new HashSet<>(); Set<String> whitelistTopicsFromConfigStore = new HashSet<>(); ConfigStoreUtils.setTopicsFromConfigStore(state.getProperties(), blacklistTopicsFromConfigStore, whitelistTopicsFromConfigStore, MRCompactor.COMPACTION_BLACKLIST, MRCompactor.COMPACTION_WHITELIST); this.blacklist.addAll(DatasetFilterUtils.getPatternsFromStrings(new ArrayList<>(blacklistTopicsFromConfigStore))); this.whitelist.addAll(DatasetFilterUtils.getPatternsFromStrings(new ArrayList<>(whitelistTopicsFromConfigStore))); }
@VisibleForTesting DatasetsFinder(State state, FileSystem fs) { this.state = state; this.conf = HadoopUtils.getConfFromState(state); this.fs = fs; this.inputDir = getInputDir(); this.destDir = getDestDir(); this.tmpOutputDir = getTmpOutputDir(); this.blacklist = DatasetFilterUtils.getPatternList(state, MRCompactor.COMPACTION_BLACKLIST); this.whitelist = DatasetFilterUtils.getPatternList(state, MRCompactor.COMPACTION_WHITELIST); setTopicsFromConfigStore(state); this.highPriority = getHighPriorityPatterns(); this.normalPriority = getNormalPriorityPatterns(); this.recompactDatasets = getRecompactDatasets(); }
/** * A {@link Dataset} should be verified if its not already compacted, and it satisfies the blacklist and whitelist. */ private boolean shouldVerifyCompletenessForDataset(Dataset dataset, List<Pattern> blacklist, List<Pattern> whitelist) { boolean renamingRequired = this.state.getPropAsBoolean(COMPACTION_RENAME_SOURCE_DIR_ENABLED, DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED); LOG.info ("Should verify completeness with renaming source dir : " + renamingRequired); return !datasetAlreadyCompacted(this.fs, dataset, renamingRequired) && DatasetFilterUtils.survived(dataset.getName(), blacklist, whitelist); }
public static List<Pattern> getPatternList(State state, String propKey, String def) { List<String> list = state.getPropAsList(propKey, def); return getPatternsFromStrings(list); }
if (datasetsFileStatus.isDirectory()) { String datasetName = getDatasetName(datasetsFileStatus.getPath().toString(), inputDir); if (DatasetFilterUtils.survived(datasetName, this.blacklist, this.whitelist)) { log.info("Found dataset: " + datasetName); Path inputPath = new Path(this.inputDir, new Path(datasetName, this.inputSubDir));
private List<Pattern> getHighPriorityPatterns() { List<String> list = this.state.getPropAsList(MRCompactor.COMPACTION_HIGH_PRIORITY_TOPICS, StringUtils.EMPTY); return DatasetFilterUtils.getPatternsFromStrings(list); }