public TokenStream create(TokenStream stream) { return new JapaneseBasicFormFilter(stream); } }
/** * Creates * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} * used to tokenize all the text in the provided {@link Reader}. * * @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents} * built from a {@link JapaneseTokenizer} filtered with * {@link JapaneseWidthFilter}, {@link JapanesePunctuationFilter}, * {@link JapanesePartOfSpeechStopFilter}, {@link JapaneseStopFilter}, * {@link KeywordMarkerFilter} if a stem exclusion set is provided, * {@link JapaneseBasicFormFilter}, {@link JapaneseKatakanaStemFilter}, * and {@link LowerCaseFilter} */ @Override protected TokenStreamComponents createComponents(String field, Reader reader) { Tokenizer tokenizer = new JapaneseTokenizer(reader, null, dictionaryDir); TokenStream stream = new JapaneseWidthFilter(tokenizer); stream = new JapanesePunctuationFilter(true, stream); stream = new JapanesePartOfSpeechStopFilter(true, stream, stoptags); stream = new StopFilter(matchVersion, stream, stopwords); if (!stemExclusionSet.isEmpty()) stream = new KeywordMarkerFilter(stream, stemExclusionSet); stream = new JapaneseBasicFormFilter(stream); stream = new JapaneseKatakanaStemFilter(stream); stream = new LowerCaseFilter(matchVersion, stream); return new TokenStreamComponents(tokenizer, stream); } }