/** * {@inheritDoc} * <p> * <b>NOTE:</b> * The default implementation chains the call to the input TokenStream, so * be sure to call <code>super.close()</code> when overriding this method. */ @Override public void close() throws IOException { input.close(); }
TokenStream stream = analyzer.tokenStream(null, new StringReader(text)); CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { System.out.println(cattr.toString()); } stream.end(); stream.close();
tokenStream.close();
public static List<String> keywords( String source ) { List<String> keywords = new ArrayList<String>(); TokenStream ts = null; try { ts = analyzer.tokenStream( "keywords", new StringReader( source ) ); ts.reset(); while ( ts.incrementToken() ) { keywords.add( ts.getAttribute( CharTermAttribute.class ).toString() ); } ts.end(); } catch ( IOException e ) { logger.error( "Error getting keywords ", e ); } finally { try { ts.close(); } catch (IOException ignored) {} } return keywords; } }
tokenStream.close();
@Override public void close() throws IOException { try { IOUtils.close(sources); } finally { super.close(); } } }
@Override public void close() throws IOException { super.close(); this.delegate.close(); }
/** NOTE: this method closes the TokenStream, even on exception, which is awkward * because really the caller who called {@link Analyzer#tokenStream} should close it, * but when trying that there are recursion issues when we try to use the same * TokenStream twice in the same recursion... */ public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { int numTokens = 0; boolean success = false; try { stream.reset(); consumer.reset(stream); while (stream.incrementToken()) { consumer.nextToken(); numTokens++; } consumer.end(); success = true; } finally { if (success) { stream.close(); } else { IOUtils.closeWhileHandlingException(stream); } } return numTokens; }
ts.close(); ts.end();
if (!tkstream.incrementToken()) { tkstream.end(); tkstream.close(); return endOfData();
@Test public void testCJKFilter() throws Exception { String s = "then quickbrownfoxjumpedoverthelazy dogss dog 2000 普林斯顿大学"; Analyzer analyzer = analyzerManager.getCommonTokensAnalyzer(); TokenStream ts = analyzer.tokenStream(FIELD, s); CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); ts.reset(); Map<String, Integer> tokens = new HashMap<>(); while (ts.incrementToken()) { String t = termAtt.toString(); Integer count = tokens.get(t); count = (count == null) ? count = 0 : count; count++; tokens.put(t, count); } ts.end(); ts.close(); assertEquals(7, tokens.size()); assertEquals(new Integer(1), tokens.get("林斯")); }
queryString.append(attr.toString()).append(" "); tokenStream.close(); } catch (IOException e) { throw new DukeException("Error parsing input string '" + v
@Test public void testCommon() throws Exception { AnalyzerManager analyzerManager = AnalyzerManager.newInstance(100000); Analyzer common = analyzerManager.getCommonTokensAnalyzer(); TokenStream ts = common.tokenStream("f", "the 5,000.12 5000 and dirty dog"); ts.reset(); CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); Set<String> seen = new HashSet<>(); while (ts.incrementToken()) { String t = termAtt.toString(); if (AlphaIdeographFilterFactory.isAlphabetic(t.toCharArray(), t.length()) && t.contains("5")) { fail("Shouldn't have found a numeric"); } seen.add(termAtt.toString()); } ts.end(); ts.close(); assertTrue(seen.contains("dirty")); assertFalse(seen.contains("the")); }
@Test public void testGeneral() throws Exception { AnalyzerManager analyzerManager = AnalyzerManager.newInstance(100000); Analyzer general = analyzerManager.getGeneralAnalyzer(); TokenStream ts = general.tokenStream("f", "tHe quick aaaa aaa anD dirty dog"); ts.reset(); CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); Set<String> seen = new HashSet<>(); while (ts.incrementToken()) { seen.add(termAtt.toString()); } ts.end(); ts.close(); assertTrue(seen.contains("the")); assertTrue(seen.contains("and")); assertTrue(seen.contains("dog")); }
/** * {@inheritDoc} * <p> * <b>NOTE:</b> * The default implementation chains the call to the input TokenStream, so * be sure to call <code>super.close()</code> when overriding this method. */ @Override public void close() throws IOException { input.close(); }
StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { super(indexSettings, name, settings); this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter"))); // check that we have a valid language by trying to create a TokenStream create(EMPTY_TOKEN_STREAM).close(); }
private List<String> terms(Analyzer analyzer, String fieldName, String text) throws IOException { List<String> terms = new ArrayList<>(); TokenStream tokenStream = analyzer.tokenStream(fieldName, text); tokenStream.addAttribute(CharTermAttribute.class); CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { terms.add(attribute.toString()); } tokenStream.close(); return terms; }