org.apache.lucene.analysis.hunspell.Stemmer java code examples

/**
 * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
 * Dictionary
 *
 * @param input TokenStream whose tokens will be stemmed
 * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
 * @param longestOnly true if only the longest term should be output.
 */
public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup,  boolean longestOnly) {
 super(input);
 this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
 this.stemmer = new Stemmer(dictionary);
 this.longestOnly = longestOnly;
}

int caseType = caseOf(word, length);
if (caseType == UPPER_CASE) {
 caseFoldTitle(word, length);
 caseFoldLower(titleBuffer, length);
 List<CharsRef> list = doStem(word, length, false);
 list.addAll(doStem(titleBuffer, length, true));
 list.addAll(doStem(lowerBuffer, length, true));
 return list;
} else if (caseType == TITLE_CASE) {
 caseFoldLower(word, length);
 List<CharsRef> list = doStem(word, length, false);
 list.addAll(doStem(lowerBuffer, length, true));
 return list;
} else {
 return doStem(word, length, false);

  stems.add(newStem(word, length, forms, i));
 boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
} catch (IOException bogus) {
 throw new RuntimeException(bogus);

  if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false)) {
   continue;
  continue;
 stems.add(newStem(strippedWord, length, forms, i));
 stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant));
} else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
 stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
 stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant));
} else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
 stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));

 boolean allowed = dictionary.onlyincompound == -1 || 
          !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
} else {
 compatible = false;
 int stripLength = stripEnd - stripStart;
 if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength)) {
  continue;
 System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
 List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix, caseVariant);
 boolean allowed = dictionary.onlyincompound == -1 || 
          !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
} else {
 compatible = false;
 int stripLength = stripEnd - stripStart;
 if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength)) {
  continue;
 List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);

buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());

/**
 * Find the stem(s) of the provided word.
 * 
 * @param word Word to find the stems for
 * @return List of stems for the word
 */
public List<CharsRef> stem(String word) {
 return stem(word.toCharArray(), word.length());
}

  if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false)) {
   continue;
  continue;
 stems.add(newStem(strippedWord, length, forms, i));
 stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix, caseVariant));
} else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
 stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));
 stems.addAll(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix, caseVariant));
} else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix) {
 stems.addAll(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix, caseVariant));

 boolean allowed = dictionary.onlyincompound == -1 || 
          !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
} else {
 compatible = false;
 int stripLength = stripEnd - stripStart;
 if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength)) {
  continue;
 System.arraycopy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
 List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, prefix, -1, recursionDepth, true, circumfix, caseVariant);
 boolean allowed = dictionary.onlyincompound == -1 || 
          !Dictionary.hasFlag(appendFlags, (char) dictionary.onlyincompound);
 compatible = allowed && hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
} else {
 compatible = false;
 int stripLength = stripEnd - stripStart;
 if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength)) {
  continue;
 List<CharsRef> stemList = applyAffix(strippedWord, strippedWord.length, suffix, prefixFlag, recursionDepth, false, circumfix, caseVariant);

buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());

/**
 * Find the unique stem(s) of the provided word
 * 
 * @param word Word to find the stems for
 * @return List of stems for the word
 */
public List<CharsRef> uniqueStems(char word[], int length) {
 List<CharsRef> stems = stem(word, length);
 if (stems.size() < 2) {
  return stems;
 }
 CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
 List<CharsRef> deduped = new ArrayList<>();
 for (CharsRef s : stems) {
  if (!terms.contains(s)) {
   deduped.add(s);
   terms.add(s);
  }
 }
 return deduped;
}

int caseType = caseOf(word, length);
if (caseType == UPPER_CASE) {
 caseFoldTitle(word, length);
 caseFoldLower(titleBuffer, length);
 List<CharsRef> list = doStem(word, length, false);
 list.addAll(doStem(titleBuffer, length, true));
 list.addAll(doStem(lowerBuffer, length, true));
 return list;
} else if (caseType == TITLE_CASE) {
 caseFoldLower(word, length);
 List<CharsRef> list = doStem(word, length, false);
 list.addAll(doStem(lowerBuffer, length, true));
 return list;
} else {
 return doStem(word, length, false);

  stems.add(newStem(word, length, forms, i));
 boolean v = stems.addAll(stem(word, length, -1, -1, -1, 0, true, true, false, false, caseVariant));
} catch (IOException bogus) {
 throw new RuntimeException(bogus);

/**
 * Find the stem(s) of the provided word.
 * 
 * @param word Word to find the stems for
 * @return List of stems for the word
 */
public List<CharsRef> stem(String word) {
 return stem(word.toCharArray(), word.length());
}

/**
 * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
 * Dictionary
 *
 * @param input TokenStream whose tokens will be stemmed
 * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
 * @param longestOnly true if only the longest term should be output.
 */
public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup,  boolean longestOnly) {
 super(input);
 this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
 this.stemmer = new Stemmer(dictionary);
 this.longestOnly = longestOnly;
}

/**
 * Find the unique stem(s) of the provided word
 * 
 * @param word Word to find the stems for
 * @return List of stems for the word
 */
public List<CharsRef> uniqueStems(char word[], int length) {
 List<CharsRef> stems = stem(word, length);
 if (stems.size() < 2) {
  return stems;
 }
 CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
 List<CharsRef> deduped = new ArrayList<>();
 for (CharsRef s : stems) {
  if (!terms.contains(s)) {
   deduped.add(s);
   terms.add(s);
  }
 }
 return deduped;
}

Javadoc

Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word. It conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.

Most used methods

<init>
Constructs a new Stemmer which will use the provided Dictionary to create its stems.
applyAffix
Applies the affix rule to the given word, producing a list of stems if any are found
caseFoldLower
folds lowercase variant of word (title cased) to lowerBuffer
caseFoldTitle
folds titlecase variant of word to titleBuffer
caseOf
returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word
checkCondition
checks condition of the concatenation of two strings
doStem
hasCrossCheckedFlag
Checks if the given flag cross checks with the given array of flags
newStem
stem
Generates a list of stems for the provided word
uniqueStems
Find the unique stem(s) of the provided word

uniqueStems

Popular in Java

Parsing JSON documents to java classes using gson
runOnUiThread (Activity)
scheduleAtFixedRate (ScheduledExecutorService)
setScale (BigDecimal)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
JCheckBox (javax.swing)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
CodeWhisperer alternatives

How to useStemmer in org.apache.lucene.analysis.hunspell

Best Java code snippets using org.apache.lucene.analysis.hunspell.Stemmer (Showing top 16 results out of 315)

How to use
Stemmer
in
org.apache.lucene.analysis.hunspell