@Override public RuleMatch[] match(AnalyzedSentence sentence) { List<RuleMatch> ruleMatches = new ArrayList<>(); AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace(); addToQueue(token, prevTokens); continue; } else if (token.isImmunized()) { List<String> origStringsToCheck = new ArrayList<>(); // original upper/lowercase spelling Map<String, AnalyzedTokenReadings> stringToToken = getStringToTokenMap(prevTokens, stringsToCheck, origStringsToCheck); String stringToCheck = stringsToCheck.get(k); String origStringToCheck = origStringsToCheck.get(k); if (getCompoundRuleData().getIncorrectCompounds().contains(stringToCheck)) { AnalyzedTokenReadings atr = stringToToken.get(stringToCheck); String msg = null; List<String> replacement = new ArrayList<>(); if (!getCompoundRuleData().getNoDashSuggestion().contains(stringToCheck)) { replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; if (isNotAllUppercase(origStringToCheck) && !getCompoundRuleData().getOnlyDashSuggestion().contains(stringToCheck)) { replacement.add(mergeCompound(origStringToCheck, getCompoundRuleData().getNoDashLowerCaseSuggestion().stream().anyMatch(s -> origStringsToCheck.contains(s)))); msg = withoutHyphenMessage; addToQueue(token, prevTokens); return toRuleMatchArray(ruleMatches);
private String normalize(String inStr) { String str = inStr.trim(); if (str.indexOf('-') != -1 && str.indexOf(' ') != -1) { if (isHyphenIgnored()) { // e.g. "E-Mail Adresse" -> "E Mail Adresse" so the error can be detected: str = str.replace('-', ' '); } else { str = str.replace(" - ", " "); } } return str; }
private Map<String, AnalyzedTokenReadings> getStringToTokenMap(Queue<AnalyzedTokenReadings> prevTokens, List<String> stringsToCheck, List<String> origStringsToCheck) { StringBuilder sb = new StringBuilder(); Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>(); int j = 0; boolean isFirstSentStart = false; for (AnalyzedTokenReadings atr : prevTokens) { sb.append(' '); sb.append(atr.getToken()); if (j == 0) { isFirstSentStart = atr.hasPosTag(JLanguageTool.SENTENCE_START_TAGNAME); } if (j >= 1) { String stringToCheck = normalize(sb.toString()); if (sentenceStartsWithUpperCase && isFirstSentStart) { stringToCheck = StringUtils.uncapitalize(stringToCheck); } stringsToCheck.add(stringToCheck); origStringsToCheck.add(sb.toString().trim()); if (!stringToToken.containsKey(stringToCheck)) { stringToToken.put(stringToCheck, atr); } } j++; } return stringToToken; }
/** * Check the text against the compound rule. * @param expectedErrors the number of expected errors * @param text the text to check * @param expSuggestions the expected suggestions */ public void check(int expectedErrors, String text, String[] expSuggestions) throws IOException { assertNotNull("Please initialize langTool!", lt); assertNotNull("Please initialize 'rule'!", rule); RuleMatch[] ruleMatches = rule.match(lt.getAnalyzedSentence(text)); assertEquals("Expected " + expectedErrors + " error(s), but got: " + Arrays.toString(ruleMatches), expectedErrors, ruleMatches.length); if (expSuggestions != null && expectedErrors != 1) { throw new RuntimeException("Sorry, test case can only check suggestion if there's one rule match"); } if (expSuggestions != null) { RuleMatch ruleMatch = ruleMatches[0]; String errorMessage = String.format("Got these suggestions: %s, expected %s ", ruleMatch.getSuggestedReplacements(), Arrays.toString(expSuggestions)); assertEquals(errorMessage, expSuggestions.length, ruleMatch.getSuggestedReplacements().size()); int i = 0; for (Object element : ruleMatch.getSuggestedReplacements()) { String suggestion = (String) element; assertEquals(expSuggestions[i], suggestion); i++; } } }
public CompoundRule(ResourceBundle messages) throws IOException { super(messages, "Dieses Wort wird mit Bindestrich geschrieben.", "Dieses Wort wird zusammengeschrieben.", "Diese Wörter werden zusammengeschrieben oder mit Bindestrich getrennt.", "Zusammenschreibung von Wörtern"); super.setCategory(Categories.COMPOUNDING.getCategory(messages)); addExamplePair(Example.wrong("Wenn es schlimmer wird, solltest Du zum <marker>HNO Arzt</marker> gehen."), Example.fixed("Wenn es schlimmer wird, solltest Du zum <marker>HNO-Arzt</marker> gehen.")); }
@Override public RuleMatch[] match(AnalyzedSentence sentence) { List<RuleMatch> ruleMatches = new ArrayList<>(); AnalyzedTokenReadings[] tokens = getSentenceWithImmunization(sentence).getTokensWithoutWhitespace(); addToQueue(token, prevTokens); continue; } else if (token.isImmunized()) { List<String> origStringsToCheck = new ArrayList<>(); // original upper/lowercase spelling Map<String, AnalyzedTokenReadings> stringToToken = getStringToTokenMap(prevTokens, stringsToCheck, origStringsToCheck); String stringToCheck = stringsToCheck.get(k); String origStringToCheck = origStringsToCheck.get(k); if (getCompoundRuleData().getIncorrectCompounds().contains(stringToCheck)) { AnalyzedTokenReadings atr = stringToToken.get(stringToCheck); String msg = null; List<String> replacement = new ArrayList<>(); if (!getCompoundRuleData().getNoDashSuggestion().contains(stringToCheck)) { replacement.add(origStringToCheck.replace(' ', '-')); msg = withHyphenMessage; if (isNotAllUppercase(origStringToCheck) && !getCompoundRuleData().getOnlyDashSuggestion().contains(stringToCheck)) { replacement.add(mergeCompound(origStringToCheck, getCompoundRuleData().getNoDashLowerCaseSuggestion().stream().anyMatch(s -> origStringsToCheck.contains(s)))); msg = withoutHyphenMessage; addToQueue(token, prevTokens); return toRuleMatchArray(ruleMatches);
private boolean isNotAllUppercase(String str) { String[] parts = str.split(" "); for (String part : parts) { if (isHyphenIgnored() || !"-".equals(part)) { // do not treat '-' as an upper-case word if (StringTools.isAllUppercase(part)) { return false; } } } return true; }
private Map<String, AnalyzedTokenReadings> getStringToTokenMap(Queue<AnalyzedTokenReadings> prevTokens, List<String> stringsToCheck, List<String> origStringsToCheck) { StringBuilder sb = new StringBuilder(); Map<String, AnalyzedTokenReadings> stringToToken = new HashMap<>(); int j = 0; boolean isFirstSentStart = false; for (AnalyzedTokenReadings atr : prevTokens) { sb.append(' '); sb.append(atr.getToken()); if (j == 0) { isFirstSentStart = atr.hasPosTag(JLanguageTool.SENTENCE_START_TAGNAME); } if (j >= 1) { String stringToCheck = normalize(sb.toString()); if (sentenceStartsWithUpperCase && isFirstSentStart) { stringToCheck = StringUtils.uncapitalize(stringToCheck); } stringsToCheck.add(stringToCheck); origStringsToCheck.add(sb.toString().trim()); if (!stringToToken.containsKey(stringToCheck)) { stringToToken.put(stringToCheck, atr); } } j++; } return stringToToken; }
private String mergeCompound(String str, boolean uncapitalizeMidWords) { String[] stringParts = str.split(" "); StringBuilder sb = new StringBuilder(); for (int k = 0; k < stringParts.length; k++) { if (isHyphenIgnored() || !"-".equals(stringParts[k])) { if (k == 0) { sb.append(stringParts[0]); } else { sb.append(uncapitalizeMidWords ? StringUtils.uncapitalize(stringParts[k]) : stringParts[k]); } } } return sb.toString(); }
private String normalize(String inStr) { String str = inStr.trim(); if (str.indexOf('-') != -1 && str.indexOf(' ') != -1) { if (isHyphenIgnored()) { // e.g. "E-Mail Adresse" -> "E Mail Adresse" so the error can be detected: str = str.replace('-', ' '); } else { str = str.replace(" - ", " "); } } return str; }
private boolean isNotAllUppercase(String str) { String[] parts = str.split(" "); for (String part : parts) { if (isHyphenIgnored() || !"-".equals(part)) { // do not treat '-' as an upper-case word if (StringTools.isAllUppercase(part)) { return false; } } } return true; }
private String mergeCompound(String str, boolean uncapitalizeMidWords) { String[] stringParts = str.split(" "); StringBuilder sb = new StringBuilder(); for (int k = 0; k < stringParts.length; k++) { if (isHyphenIgnored() || !"-".equals(stringParts[k])) { if (k == 0) { sb.append(stringParts[0]); } else { sb.append(uncapitalizeMidWords ? StringUtils.uncapitalize(stringParts[k]) : stringParts[k]); } } } return sb.toString(); }