/** * Minimal implementation that returns the default implementation * corresponding to all false parameters. Subclasses should override this to * handle true parameters. */ protected TokenStream getStandardTokenStream(String strOrig) throws IOException { StandardTokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(new StringReader(strOrig)); return tokenizer; }
... final StandardTokenizer src = new StandardTokenizer(); src.setReader(new StringReader("this table")); ...
public static void main(String[] args) throws IOException, ParseException, org.apache.lucene.queryparser.surround.parser.ParseException { StandardTokenizer stdToken = new StandardTokenizer(); stdToken.setReader(new StringReader("Some stuff that is in need of analysis")); TokenStream tokenStream; //You're code starts here tokenStream = new StopFilter(new ASCIIFoldingFilter(new ClassicFilter(new LowerCaseFilter(stdToken))), EnglishAnalyzer.getDefaultStopSet()); tokenStream.reset(); //And ends here CharTermAttribute token = tokenStream.getAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { System.out.println(token.toString()); } tokenStream.close(); }
@Override protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed, final boolean stopWordsAllowed) throws IOException { StandardTokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(new StringReader(strOrig)); if (stemsAllowed) { Dictionary dictionary = getDict(); if (dictionary == null) { return tokenizer; } return new HunspellStemFilter(tokenizer, dictionary); /// TODO: implement stop words checks } else { return tokenizer; } }
try { tokenizer.close(); tokenizer.setReader(stringReader); tokenizer.reset(); while ( tokenizer.incrementToken() ) {
final StandardTokenizer tokenizer = new StandardTokenizer(); try { tokenizer.setReader(stringReader); tokenizer.reset(); while ( tokenizer.incrementToken() ) {