/** * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with * {@link GreekLowerCaseFilter} and {@link StopFilter} */ @Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(matchVersion, reader); result = new GreekLowerCaseFilter(result); if (doStopWords && stopSet != null) { result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); } return result; }
/** * Constructs a {@link LowerCaseTokenizer} filtered by a language filter * {@link StopFilter} and {@link PorterStemFilter} for English. */ @Override public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new LowerCaseTokenizer(reader); if (doStopWords && stopSet != null) { result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); } // Using Porter Stemmer if (doStemming) { result = new PorterStemFilter(result); } return result; }
@Override public final TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new ArabicLetterTokenizer(reader); result = new LowerCaseFilter(result); result = new ArabicNormalizationFilter(result); /* additional persian-specific normalization */ result = new PersianNormalizationFilter(result); /* * the order here is important: the stop set is normalized with the * above! */ if (doStopWords && stopSet != null) { result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); } return result; }
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new LowerCaseTokenizer(reader)); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new LowerCaseTokenizer(reader)); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); } if (doStemming) { streams.setResult(new SnowballFilter(streams.getResult(), stemmerName)); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new LowerCaseTokenizer(reader)); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); } if (doStemming) { streams.setResult(new PorterStemFilter(streams.getResult())); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new LowerCaseTokenizer(reader)); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); } if (doStemming) { streams.setResult(new GermanStemFilter(streams.getResult())); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
/** * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text * in the provided {@link Reader}. * * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with * {@link GreekLowerCaseFilter} and {@link StopFilter} */ @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new StandardTokenizer(matchVersion, reader)); streams.setResult(new GreekLowerCaseFilter(streams.getResult())); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new StandardTokenizer(matchVersion, reader)); streams.setResult(new ThaiWordFilter(streams.getResult())); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); streams.getResult().reset(); // reset the ThaiWordFilter's state } return streams.getResult(); }
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(new ArabicLetterTokenizer(reader)); streams.setResult(new LowerCaseFilter(streams.getResult())); streams.setResult(new ArabicNormalizationFilter(streams.getResult())); if (doStopWords && stopSet != null) { streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); } if (doStemming) { streams.setResult(new ArabicStemFilter(streams.getResult())); } setPreviousTokenStream(streams); } else { streams.getSource().reset(reader); } return streams.getResult(); }