/** * Creates a * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link LowerCaseFilter}, {@link StopFilter} * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new LowerCaseFilter(source); result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new SwedishStemmer()); return new TokenStreamComponents(source, result); }
public SwedishSnowballStemmer() { super(new SwedishStemmer()); } public Language getLanguage() { return Language.SWEDISH; }
@Override public TokenStream apply(final TokenStream input) { return new SnowballFilter(input, new SwedishStemmer()); } };
static public TokenStream swedish(TokenStream result) { result = new LowerCaseFilter(result); result = new SnowballFilter(result, new SwedishStemmer()); return result; }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, SwedishAnalyzer.getDefaultStopSet()); if (useStem) { if (!stemExclusionSet.isEmpty()) stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); stream = new SnowballFilter(stream, new SwedishStemmer()); } return stream; } }
/** * Creates a * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} * , {@link KeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new SwedishStemmer()); return new TokenStreamComponents(source, result); } }
return new SnowballFilter(tokenStream, new SwedishStemmer()); } else if ("light_swedish".equalsIgnoreCase(language) || "lightSwedish".equalsIgnoreCase(language)) { return new SwedishLightStemFilter(tokenStream);
public static SnowballProgram getStemmer(String lang) { switch (lang.toUpperCase()) { case "EN": return new EnglishStemmer(); case "PT": return new PortugueseStemmer(); case "ES": return new SpanishStemmer(); case "DE": return new GermanStemmer(); case "FR": return new FrenchStemmer(); case "SV": return new SwedishStemmer(); case "IT": return new ItalianStemmer(); case "NL": return new DutchStemmer(); case "RU": return new RussianStemmer(); case "AR": case "FA": case "ZH": case "KO": return null; } return null; } }
return new SnowballFilter(tokenStream, new SwedishStemmer()); } else if ("light_swedish".equalsIgnoreCase(language) || "lightSwedish".equalsIgnoreCase(language)) { return new SwedishLightStemFilter(tokenStream);