public KuromojiAnalyzerProvider(final IndexSettings indexSettings, final Environment env, final String name, final Settings settings) { super(indexSettings, name, settings); final Set<?> stopWords = Analysis.parseStopWords( env, indexSettings.getIndexVersionCreated(), settings, JapaneseAnalyzer.getDefaultStopSet()); final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings); final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); }
@SuppressWarnings("resource") @Override protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed) throws IOException { if (stemsAllowed) { // Blank out tags when stemming only strOrig = blankOutTags(strOrig); CharArraySet stopWords = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET; Set<String> stopTags = stopWordsAllowed ? JapaneseAnalyzer.getDefaultStopTags() : Collections.emptySet(); return new JapaneseAnalyzer(null, Mode.SEARCH, stopWords, stopTags).tokenStream("", new StringReader(strOrig)); } else { JapaneseTokenizer tokenizer = new JapaneseTokenizer(null, false, Mode.NORMAL); tokenizer.setReader(new StringReader(strOrig)); return new TagJoiningFilter(tokenizer); } }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new JapaneseBaseFormFilter(tokenizer); stream = new CJKWidthFilter(stream); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) { stream = new JapanesePartOfSpeechStopFilter(true, stream, JapaneseAnalyzer.getDefaultStopTags()); stream = new StopFilter(matchVersion, stream, JapaneseAnalyzer.getDefaultStopSet()); } if (useStem) stream = new JapaneseKatakanaStemFilter(stream); return stream; } }
@Override public TokenStream create(TokenStream tokenStream) { return new StopFilter(tokenStream, JapaneseAnalyzer.getDefaultStopSet()); } }));
@Override public TokenStream create(TokenStream tokenStream) { return new JapanesePartOfSpeechStopFilter(tokenStream, JapaneseAnalyzer .getDefaultStopTags()); } }));
public JapaneseStopTokenFilterFactory(final IndexSettings indexSettings, final Environment env, final String name, final Settings settings) { super(indexSettings, name, settings); this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.removeTrailing = settings.getAsBoolean("remove_trailing", true); this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase); }
@Override public TokenStream create(TokenStream tokenStream) { return new JapanesePartOfSpeechStopFilter(tokenStream, JapaneseAnalyzer .getDefaultStopTags()); } }));
@Inject public KuromojiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet(), version); final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings); final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); }
@Inject public JapaneseStopTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettingsService.getSettings(), name, settings); this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.removeTrailing = settings.getAsBoolean("remove_trailing", true); Map<String, Set<?>> namedStopWords = MapBuilder.<String, Set<?>>newMapBuilder() .put("_japanese_", JapaneseAnalyzer.getDefaultStopSet()) .immutableMap(); this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), namedStopWords, ignoreCase); }
@Inject public KuromojiAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettingsService.getSettings(), name, settings); final Set<?> stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet()); final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings); final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); }
@Inject public JapaneseStopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.removeTrailing = settings.getAsBoolean("remove_trailing", true); ImmutableMap<String, Set<?>> namedStopWords = MapBuilder.<String, Set<?>>newMapBuilder() .put("_japanese_", JapaneseAnalyzer.getDefaultStopSet()) .immutableMap(); this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), namedStopWords, version, ignoreCase); }