/** * Creates a the TokenStreamComponents used to analyze the stream. * * @param fieldName the field that this lucene analyzer will process * @return the token stream filter chain */ @Override protected TokenStreamComponents createComponents(String fieldName) { //final Tokenizer source = new AlphaNumericTokenizer(); final Tokenizer source = new WhitespaceTokenizer(); TokenStream stream = source; stream = new UrlTokenizingFilter(stream); stream = new AlphaNumericFilter(stream); stream = new WordDelimiterGraphFilter(stream, WordDelimiterGraphFilter.GENERATE_WORD_PARTS | WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS | WordDelimiterGraphFilter.PRESERVE_ORIGINAL | WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterGraphFilter.SPLIT_ON_NUMERICS | WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null); stream = new LowerCaseFilter(stream); stream = new StopFilter(stream, stopWords); concatenatingFilter = new TokenPairConcatenatingFilter(stream); return new TokenStreamComponents(source, concatenatingFilter); }
@Override public TokenStream create(TokenStream input) { StopFilter stopFilter = new StopFilter(input,stopWords); return stopFilter; } }
@Override public TokenStream create(TokenStream tokenStream) { if (removeTrailing) { return new StopFilter(tokenStream, stopWords); } else { return new SuggestStopFilter(tokenStream, stopWords); } }
@Override public TokenStream create(TokenStream tokenStream) { if (removeTrailing) { return new StopFilter(tokenStream, stopWords); } else { return new SuggestStopFilter(tokenStream, stopWords); } }
@Override public TokenStream create(TokenStream tokenStream) { return new StopFilter(tokenStream, JapaneseAnalyzer.getDefaultStopSet()); } }));
@Override public TokenStream create(TokenStream input) { if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) { return new StopFilter(input, stopWords); } else { @SuppressWarnings("deprecation") final TokenStream filter = new Lucene43StopFilter(enablePositionIncrements, input, stopWords); return filter; } } }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { if (stopWords == null){ stopWords = LanguageTokenizer.getStopWordsForNonLuceneLangFromFile(matchVersion, Language.getByLangCode("he")); } TokenStream stream = tokenizer; if (useStopWords) stream = new StopFilter(matchVersion, stream, stopWords); return stream; } }
@Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { Set<String> stopWords = stopWordsPerField.get(fieldName); if (stopWords == null) { return components; } StopFilter stopFilter = new StopFilter(components.getTokenStream(), new CharArraySet(stopWords, false)); return new TokenStreamComponents(components.getTokenizer(), stopFilter); }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new CJKWidthFilter(tokenizer); stream = new CJKBigramFilter(stream); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, CJKAnalyzer.getDefaultStopSet()); return stream; } }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { LetterTokenizer tokenizer = new LetterTokenizer(BonnieConstants.LUCENE_VERSION, reader); TokenStream result = new LowerCaseFilter(BonnieConstants.LUCENE_VERSION, tokenizer); result = new StopFilter(BonnieConstants.LUCENE_VERSION, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); result = new PorterStemFilter(result); return new TokenStreamComponents(tokenizer, result); }
/** {@inheritDoc} */ @Override protected Analyzer.TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new LowerCaseFilter(result); result = new StopFilter(result, stopwords); result = new SnowballFilter(result, language); return new TokenStreamComponents(source, result); } }
protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new ChineseTokenStream(reader); if (stopWordManager != null) { //走停止词过滤 CharArraySet stopWords = new CharArraySet(Version.LUCENE_CURRENT, stopWordManager.getStopWords(), true); TokenStream result = new StopFilter(Version.LUCENE_CURRENT, source, stopWords); return new TokenStreamComponents(source, result); } else { //走原始逻辑 return new TokenStreamComponents(source); } }
@Override protected TokenStreamComponents createComponents(String arg0) { final org.apache.lucene.analysis.Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new LowerCaseFilter(result); result = new StopFilter(result, stopWords); result = new SetKeywordMarkerFilter(result, CharArraySet.EMPTY_SET); result = new GermanStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final WhitespaceTokenizer source = new WhitespaceTokenizer(matchVersion, reader); TokenStream sink = new URIDecodingFilter(source, "UTF-8"); sink = this.applyURINormalisation(sink); sink = new MailtoFilter(sink); sink = new LowerCaseFilter(matchVersion, sink ); sink = new StopFilter(matchVersion, sink, stopSet); sink = new LengthFilter(true, sink, 2, 256); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final WhitespaceTokenizer source = new WhitespaceTokenizer(matchVersion, reader); TokenStream sink = new URIDecodingFilter(source, "UTF-8"); sink = this.applyURINormalisation(sink); sink = new MailtoFilter(sink); sink = new LowerCaseFilter(matchVersion, sink ); sink = new StopFilter(matchVersion, sink, stopSet); sink = new LengthFilter(matchVersion, true, sink, 2, 256); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(result); result = new EnglishPossessiveFilter(result); result = new StopFilter(result, stopwords); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(final String field) { final Tokenizer source = new NGramTokenizer(minNgram(), maxNgram()); final TokenStream result = new StopFilter( new LowerCaseFilter(new StandardFilter(source)), new CharArraySet(asList(stopWords()), true)); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(result); result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(result); result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(matchVersion, result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }