@Override public TokenStream create(TokenStream tokenStream) { return new JapaneseKatakanaStemFilter(tokenStream); } }));
@Override public TokenStream create(TokenStream tokenStream) { return new JapaneseKatakanaStemFilter(tokenStream, minimumLength); } }
@Override public TokenStream create(final TokenStream tokenStream) { return new JapaneseKatakanaStemFilter(tokenStream, minimumLength); } }
@Override public TokenStream create(TokenStream tokenStream) { return new JapaneseKatakanaStemFilter(tokenStream, minimumLength); } }
@Override public TokenStream create(TokenStream input) { return new JapaneseKatakanaStemFilter(input, minimumLength); } }
@Override public TokenStream create(TokenStream tokenStream) { return new JapaneseKatakanaStemFilter(tokenStream); } }));
@Override public TokenStream create(final TokenStream tokenStream) { return new JapaneseKatakanaStemFilter(tokenStream, minimumLength); } }
public TokenStream create(TokenStream stream) { return new JapaneseKatakanaStemFilter(stream); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new JapaneseTokenizer(userDict, true, mode); TokenStream stream = new JapaneseBaseFormFilter(tokenizer); stream = new JapanesePartOfSpeechStopFilter(stream, stoptags); stream = new CJKWidthFilter(stream); stream = new StopFilter(stream, stopwords); stream = new JapaneseKatakanaStemFilter(stream); stream = new LowerCaseFilter(stream); return new TokenStreamComponents(tokenizer, stream); }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new JapaneseBaseFormFilter(tokenizer); stream = new CJKWidthFilter(stream); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) { stream = new JapanesePartOfSpeechStopFilter(true, stream, JapaneseAnalyzer.getDefaultStopTags()); stream = new StopFilter(matchVersion, stream, JapaneseAnalyzer.getDefaultStopSet()); } if (useStem) stream = new JapaneseKatakanaStemFilter(stream); return stream; } }
/** * Creates * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} * used to tokenize all the text in the provided {@link Reader}. * * @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} * built from a {@link JapaneseTokenizer} filtered with * {@link JapaneseWidthFilter}, {@link JapanesePunctuationFilter}, * {@link JapanesePartOfSpeechStopFilter}, {@link JapaneseStopFilter}, * {@link KeywordMarkerFilter} if a stem exclusion set is provided, * {@link JapaneseBasicFormFilter}, {@link JapaneseKatakanaStemFilter}, * and {@link LowerCaseFilter} */ @Override protected TokenStreamComponents createComponents(String field, Reader reader) { Tokenizer tokenizer = new JapaneseTokenizer(reader, null, dictionaryDir); TokenStream stream = new JapaneseWidthFilter(tokenizer); stream = new JapanesePunctuationFilter(true, stream); stream = new JapanesePartOfSpeechStopFilter(true, stream, stoptags); stream = new StopFilter(matchVersion, stream, stopwords); if (!stemExclusionSet.isEmpty()) stream = new KeywordMarkerFilter(stream, stemExclusionSet); stream = new JapaneseBasicFormFilter(stream); stream = new JapaneseKatakanaStemFilter(stream); stream = new LowerCaseFilter(matchVersion, stream); return new TokenStreamComponents(tokenizer, stream); } }