/** * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided * Dictionary * * @param input TokenStream whose tokens will be stemmed * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens * @param longestOnly true if only the longest term should be output. */ public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup, boolean longestOnly) { super(input); this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set this.stemmer = new Stemmer(dictionary); this.longestOnly = longestOnly; }
int caseType = caseOf(word, length); if (caseType == UPPER_CASE) { caseFoldTitle(word, length); caseFoldLower(titleBuffer, length); List<CharsRef> list = doStem(word, length, false); list.addAll(doStem(titleBuffer, length, true)); list.addAll(doStem(lowerBuffer, length, true)); return list; } else if (caseType == TITLE_CASE) { caseFoldLower(word, length); List<CharsRef> list = doStem(word, length, false); list.addAll(doStem(lowerBuffer, length, true)); return list; } else { return doStem(word, length, false);
if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false)) { continue; continue; stems.add(newStem(strippedWord, length, forms, i)); stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant)); } else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) { stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant)); stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant)); } else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix) { stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
boolean allowed = dictionary.onlyincompound == -1 || !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false); } else { compatible = false; int stripLength = stripEnd - stripStart; if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength)) { continue; System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength); List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix, caseVariant); boolean allowed = dictionary.onlyincompound == -1 || !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix); } else { compatible = false; int stripLength = stripEnd - stripStart; if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength)) { continue; List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);
buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
/** * Find the stem(s) of the provided word. * * @param word Word to find the stems for * @return List of stems for the word */ public List<CharsRef> stem(String word) { return stem(word.toCharArray(), word.length()); }
if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false)) { continue; continue; stems.add(newStem(strippedWord, length, forms, i)); stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant)); } else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) { stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant)); stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant)); } else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix) { stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
boolean allowed = dictionary.onlyincompound == -1 || !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false); } else { compatible = false; int stripLength = stripEnd - stripStart; if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength)) { continue; System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength); List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix, caseVariant); boolean allowed = dictionary.onlyincompound == -1 || !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound); compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix); } else { compatible = false; int stripLength = stripEnd - stripStart; if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength)) { continue; List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);
buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
/** * Find the unique stem(s) of the provided word * * @param word Word to find the stems for * @return List of stems for the word */ public List<CharsRef> uniqueStems(char word[], int length) { List<CharsRef> stems = stem(word, length); if (stems.size() < 2) { return stems; } CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase); List<CharsRef> deduped = new ArrayList<>(); for (CharsRef s : stems) { if (!terms.contains(s)) { deduped.add(s); terms.add(s); } } return deduped; }
int caseType = caseOf(word, length); if (caseType == UPPER_CASE) { caseFoldTitle(word, length); caseFoldLower(titleBuffer, length); List<CharsRef> list = doStem(word, length, false); list.addAll(doStem(titleBuffer, length, true)); list.addAll(doStem(lowerBuffer, length, true)); return list; } else if (caseType == TITLE_CASE) { caseFoldLower(word, length); List<CharsRef> list = doStem(word, length, false); list.addAll(doStem(lowerBuffer, length, true)); return list; } else { return doStem(word, length, false);
/** * Find the stem(s) of the provided word. * * @param word Word to find the stems for * @return List of stems for the word */ public List<CharsRef> stem(String word) { return stem(word.toCharArray(), word.length()); }
/** * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided * Dictionary * * @param input TokenStream whose tokens will be stemmed * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens * @param longestOnly true if only the longest term should be output. */ public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup, boolean longestOnly) { super(input); this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set this.stemmer = new Stemmer(dictionary); this.longestOnly = longestOnly; }
/** * Find the unique stem(s) of the provided word * * @param word Word to find the stems for * @return List of stems for the word */ public List<CharsRef> uniqueStems(char word[], int length) { List<CharsRef> stems = stem(word, length); if (stems.size() < 2) { return stems; } CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase); List<CharsRef> deduped = new ArrayList<>(); for (CharsRef s : stems) { if (!terms.contains(s)) { deduped.add(s); terms.add(s); } } return deduped; }