dicInputStream.close(); HunspellStemmer stemmer = new HunspellStemmer(dictionary); printStemResults(word, stemmer.stem(word.toCharArray(), word.length()));
if (hasCrossCheckedFlag(suffix.getFlag(), flags)) { int deAffixedLength = length - suffix.getAppend().length(); List<Stem> stemList = applyAffix(strippedWord.toCharArray(), strippedWord.length(), suffix, recursionDepth); for (Stem stem : stemList) { stem.addSuffix(suffix); if (hasCrossCheckedFlag(prefix.getFlag(), flags)) { int deAffixedStart = prefix.getAppend().length(); int deAffixedLength = length - deAffixedStart; .toString(); List<Stem> stemList = applyAffix(strippedWord.toCharArray(), strippedWord.length(), prefix, recursionDepth); for (Stem stem : stemList) { stem.addPrefix(prefix);
buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
/** * Find the stem(s) of the provided word * * @param word Word to find the stems for * @return List of stems for the word */ public List<Stem> stem(String word) { return stem(word.toCharArray(), word.length()); }
/** * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided * HunspellDictionary * * @param input TokenStream whose tokens will be stemmed * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens * @param dedup true if only unique terms should be output. */ public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, boolean dedup) { super(input); this.dedup = dedup; this.stemmer = new HunspellStemmer(dictionary); }
/** * Prints the results of the stemming of a word * * @param originalWord Word that has been stemmed * @param stems Stems of the word */ private static void printStemResults(String originalWord, List<Stem> stems) { StringBuilder builder = new StringBuilder().append("stem(").append(originalWord).append(")").append("\n"); for (Stem stem : stems) { builder.append("- ").append(stem.getStem()).append(": "); for (HunspellAffix prefix : stem.getPrefixes()) { builder.append(prefix.getAppend()).append("+"); if (hasText(prefix.getStrip())) { builder.append(prefix.getStrip()).append("-"); } } builder.append(stem.getStem()); for (HunspellAffix suffix : stem.getSuffixes()) { if (hasText(suffix.getStrip())) { builder.append("-").append(suffix.getStrip()); } builder.append("+").append(suffix.getAppend()); } builder.append("\n"); } System.out.println(builder); }
/** * Find the stem(s) of the provided word * * @param word Word to find the stems for * @return List of stems for the word */ public List<Stem> stem(char word[], int length) { List<Stem> stems = new ArrayList<Stem>(); if (dictionary.lookupWord(word, 0, length) != null) { stems.add(new Stem(word, length)); } stems.addAll(stem(word, length, null, 0)); return stems; }
stems.addAll(stem(strippedWord, length, affix.getAppendFlags(), ++recursionDepth));
/** * Find the unique stem(s) of the provided word * * @param word Word to find the stems for * @return List of stems for the word */ public List<Stem> uniqueStems(char word[], int length) { List<Stem> stems = new ArrayList<Stem>(); CharArraySet terms = new CharArraySet(dictionary.getVersion(), 8, dictionary.isIgnoreCase()); if (dictionary.lookupWord(word, 0, length) != null) { stems.add(new Stem(word, length)); terms.add(word); } List<Stem> otherStems = stem(word, length, null, 0); for (Stem s : otherStems) { if (!terms.contains(s.stem)) { stems.add(s); terms.add(s.stem); } } return stems; }