private Map<String, String> createSingleWordSynonymMap() { if (_caseSensitive) { // in the case-sensitive scenario we can simply reuse the normal // synonym map return _synonymMap; } final Map<String, String> synonymMap = new HashMap<>(); final Set<Entry<String, String>> entries = _synonymMap.entrySet(); for (final Entry<String, String> entry : entries) { final String synonym = entry.getKey(); final String masterTerm = entry.getValue(); if (StringUtils.isSingleWord(synonym)) { synonymMap.put(synonym.toLowerCase(), masterTerm); } } return synonymMap; }
private SortedMap<String, String> createMultiWordSynonymMap() { final SortedMap<String, String> synonymMap = new TreeMap<>(Comparator.comparingInt(String::length).reversed().thenComparing(String::compareTo)); final Set<Entry<String, String>> entries = _synonymMap.entrySet(); for (final Entry<String, String> entry : entries) { final String synonym = entry.getKey(); final String masterTerm = entry.getValue(); if (!StringUtils.isSingleWord(synonym)) { if (_caseSensitive) { synonymMap.put(synonym, masterTerm); } else { synonymMap.put(synonym.toLowerCase(), masterTerm); } } } return synonymMap; }
@Initialize public void init() { _dictionaryConnection = _dictionary.openConnection(_configuration); multiWordDictionaryPatterns = new LinkedHashMap<>(); final Iterator<String> allValues = _dictionaryConnection.getLengthSortedValues(); while (allValues.hasNext()) { final String value = allValues.next(); if (!StringUtils.isSingleWord(value)) { final Pattern pattern; if (_dictionary.isCaseSensitive()) { pattern = Pattern.compile("\\b" + Pattern.quote(value) + "\\b"); } else { pattern = Pattern.compile("\\b" + Pattern.quote(value.toLowerCase()) + "\\b"); } multiWordDictionaryPatterns.put(value, pattern); } } }
final List<String> tokens = StringUtils.splitOnWordBoundaries(value, true); for (final String token : tokens) { if (StringUtils.isSingleWord(token)) { if (_dictionaryConnection.containsValue(token)) { removedParts.add(token);
final List<String> tokens = StringUtils.splitOnWordBoundaries(sentence, true); for (final String token : tokens) { if (StringUtils.isSingleWord(token)) { final String masterTerm = getMasterTerm(token); if (masterTerm == null) {