Refine search
public final class LuceneUtil { private LuceneUtil() {} public static List<String> tokenizeString(Analyzer analyzer, String string) { List<String> result = new ArrayList<String>(); try { TokenStream stream = analyzer.tokenStream(null, new StringReader(string)); stream.reset(); while (stream.incrementToken()) { result.add(stream.getAttribute(CharTermAttribute.class).toString()); } } catch (IOException e) { // not thrown b/c we're using a string reader... throw new RuntimeException(e); } return result; } }
/** * Creates simple boolean query from the cached tokenstream contents */ protected Query analyzeBoolean(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); List<Term> terms = new ArrayList<>(); while (stream.incrementToken()) { terms.add(new Term(field, termAtt.getBytesRef())); } return newSynonymQuery(terms.toArray(new Term[terms.size()])); }
@Override public Result parse(CharSequence text) { TokenStream stream; try { stream = analyzer.tokenStream("text", StrUtil.str(text)); stream.reset(); } catch (IOException e) { throw new TokenizerException(e); } return new AnalysisResult(stream); }
public static List<String> keywords( String source ) { List<String> keywords = new ArrayList<String>(); TokenStream ts = null; try { ts = analyzer.tokenStream( "keywords", new StringReader( source ) ); ts.reset(); while ( ts.incrementToken() ) { keywords.add( ts.getAttribute( CharTermAttribute.class ).toString() ); } ts.end(); } catch ( IOException e ) { logger.error( "Error getting keywords ", e ); } finally { try { ts.close(); } catch (IOException ignored) {} } return keywords; } }
TokenStream stream = analyzer.tokenStream(null, new StringReader(text)); CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { System.out.println(cattr.toString()); } stream.end(); stream.close();
@Override public Result parse(CharSequence text) { TokenStream stream; try { stream = analyzer.tokenStream("text", StrUtil.str(text)); stream.reset(); } catch (IOException e) { throw new TokenizerException(e); } return new AnalysisResult(stream); }
private SToken[] getTokens(String text) throws IOException { //FIXME somehow integrate below cycle to getSummary to save the cloning and memory, //also creating Tokens is suboptimal with 3.0.0 , this whole class could be replaced by highlighter ArrayList<SToken> result = new ArrayList<>(); try (TokenStream ts = analyzer.tokenStream("full", text)) { CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); ts.reset(); while (ts.incrementToken()) { SToken t = new SToken(term.buffer(), 0, term.length(), offset.startOffset(), offset.endOffset()); result.add(t); } ts.end(); } return result.toArray(new SToken[result.size()]); }
/** * Creates simple term query from the cached tokenstream contents */ protected Query analyzeTerm(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); if (!stream.incrementToken()) { throw new AssertionError(); } return newTermQuery(new Term(field, termAtt.getBytesRef())); }
private CachingTokenFilter getBuffer(Analyzer analyzer, FieldQueryNode fieldNode) { final TokenStream source; final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); try { source = analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } return new CachingTokenFilter(source); }
/** * @param text * @return */ public Set<String> getToken(String text) { Set<String> list = new LinkedHashSet<>(); if (CommonUtils.notEmpty(text)) { try (StringReader stringReader = new StringReader(text); TokenStream tokenStream = dao.getAnalyzer().tokenStream(CommonConstants.BLANK, stringReader)) { CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { list.add(charTermAttribute.toString()); } tokenStream.end(); return list; } catch (IOException e) { return list; } } return list; }
tokenStream.reset(); while (tokenStream.incrementToken()) { stems.add(token.toString());
@Override public Result parse(CharSequence text) { TokenStream stream; try { stream = analyzer.tokenStream("text", StrUtil.str(text)); stream.reset(); } catch (IOException e) { throw new TokenizerException(e); } return new AnalysisResult(stream); }
/** * @param text * @return */ public Set<String> getToken(String text) { Set<String> list = new LinkedHashSet<>(); if (CommonUtils.notEmpty(text)) { try (StringReader stringReader = new StringReader(text); TokenStream tokenStream = dao.getAnalyzer().tokenStream(CommonConstants.BLANK, stringReader)) { CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { list.add(charTermAttribute.toString()); } tokenStream.end(); return list; } catch (IOException e) { return list; } } return list; }
TokenStream tokenStream = analyzer.tokenStream(fieldName, reader); OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { int startOffset = offsetAttribute.startOffset(); int endOffset = offsetAttribute.endOffset(); String term = charTermAttribute.toString(); }
/** * Parses the query. Using this instead of a QueryParser in order * to avoid thread-safety issues with Lucene's query parser. * * @param fieldName the name of the field * @param value the value of the field * @return the parsed query */ private Query parseTokens(String fieldName, String value) { BooleanQuery searchQuery = new BooleanQuery(); if (value != null) { Analyzer analyzer = new KeywordAnalyzer(); try { TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(value)); tokenStream.reset(); CharTermAttribute attr = tokenStream.getAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { String term = attr.toString(); Query termQuery = new TermQuery(new Term(fieldName, term)); searchQuery.add(termQuery, Occur.SHOULD); } } catch (IOException e) { throw new DukeException("Error parsing input string '" + value + "' " + "in field " + fieldName); } } return searchQuery; }
tokenStream.reset(); while (tokenStream.incrementToken()) { String term = token.toString();
private static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException { // Logic similar to QueryParser#getFieldQuery try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) { source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); BytesRefBuilder builder = new BytesRefBuilder(); while (source.incrementToken()) { // UTF-8 builder.copyChars(termAtt); query.add(new Term(field, builder.toBytesRef())); } } query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch); query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch); return query; }
/** * Creates complex boolean query from the cached tokenstream contents */ protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException { BooleanQuery.Builder q = newBooleanQuery(); List<Term> currentQuery = new ArrayList<>(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) { if (posIncrAtt.getPositionIncrement() != 0) { add(q, currentQuery, operator); currentQuery.clear(); } currentQuery.add(new Term(field, termAtt.getBytesRef())); } add(q, currentQuery, operator); return q.build(); }
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { skipTerms.add(new Term(fieldName, termAtt.toString()));
new StringTokenStream(attributeFactory, filteredText, text.length()))) { final TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); ts.reset(); if (ts.incrementToken() == false) { throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 0 for analyzer " if (ts.incrementToken()) { throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 2+ for analyzer "