org.apache.lucene.analysis.TokenStream.reset java code examples

Refine search

 public final class LuceneUtil {

 private LuceneUtil() {}

 public static List<String> tokenizeString(Analyzer analyzer, String string) {
  List<String> result = new ArrayList<String>();
  try {
   TokenStream stream  = analyzer.tokenStream(null, new StringReader(string));
   stream.reset();
   while (stream.incrementToken()) {
    result.add(stream.getAttribute(CharTermAttribute.class).toString());
   }
  } catch (IOException e) {
   // not thrown b/c we're using a string reader...
   throw new RuntimeException(e);
  }
  return result;
 }

}

/** 
 * Creates simple boolean query from the cached tokenstream contents 
 */
protected Query analyzeBoolean(String field, TokenStream stream) throws IOException {
 TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
 
 stream.reset();
 List<Term> terms = new ArrayList<>();
 while (stream.incrementToken()) {
  terms.add(new Term(field, termAtt.getBytesRef()));
 }
 
 return newSynonymQuery(terms.toArray(new Term[terms.size()]));
}

@Override
public Result parse(CharSequence text) {
  TokenStream stream;
  try {
    stream = analyzer.tokenStream("text", StrUtil.str(text));
    stream.reset();
  } catch (IOException e) {
    throw new TokenizerException(e);
  }
  return new AnalysisResult(stream);
}

  public static List<String> keywords( String source ) {
    List<String> keywords = new ArrayList<String>();
    TokenStream ts = null;
    try {
      ts = analyzer.tokenStream( "keywords", new StringReader( source ) );
      ts.reset();
      while ( ts.incrementToken() ) {
        keywords.add( ts.getAttribute( CharTermAttribute.class ).toString() );
      }
      ts.end();
    }
    catch ( IOException e ) {
      logger.error( "Error getting keywords ", e );
    }
    finally {
      try {
         ts.close();
      } catch (IOException ignored) {}
    }
    return keywords;
  }
}

 TokenStream stream = analyzer.tokenStream(null, new StringReader(text));
CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
 System.out.println(cattr.toString());
}
stream.end();
stream.close();

@Override
public Result parse(CharSequence text) {
  TokenStream stream;
  try {
    stream = analyzer.tokenStream("text", StrUtil.str(text));
    stream.reset();
  } catch (IOException e) {
    throw new TokenizerException(e);
  }
  return new AnalysisResult(stream);
}

private SToken[] getTokens(String text) throws IOException {
  //FIXME somehow integrate below cycle to getSummary to save the cloning and memory,
  //also creating Tokens is suboptimal with 3.0.0 , this whole class could be replaced by highlighter        
  ArrayList<SToken> result = new ArrayList<>();
  try (TokenStream ts = analyzer.tokenStream("full", text)) {
    CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      SToken t = new SToken(term.buffer(), 0, term.length(), offset.startOffset(), offset.endOffset());
      result.add(t);
    }
    ts.end();
  }
  return result.toArray(new SToken[result.size()]);
}

/** 
 * Creates simple term query from the cached tokenstream contents 
 */
protected Query analyzeTerm(String field, TokenStream stream) throws IOException {
 TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
 
 stream.reset();
 if (!stream.incrementToken()) {
  throw new AssertionError();
 }
 
 return newTermQuery(new Term(field, termAtt.getBytesRef()));
}

private CachingTokenFilter getBuffer(Analyzer analyzer, FieldQueryNode fieldNode) {
 final TokenStream source;
 final String text = fieldNode.getTextAsString();
 final String field = fieldNode.getFieldAsString();
 try {
  source = analyzer.tokenStream(field, new StringReader(text));
  source.reset();
 }
 catch (final IOException e1) {
  throw new RuntimeException(e1);
 }
 return new CachingTokenFilter(source);
}

/**
 * @param text
 * @return
 */
public Set<String> getToken(String text) {
  Set<String> list = new LinkedHashSet<>();
  if (CommonUtils.notEmpty(text)) {
    try (StringReader stringReader = new StringReader(text);
        TokenStream tokenStream = dao.getAnalyzer().tokenStream(CommonConstants.BLANK, stringReader)) {
      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
      tokenStream.reset();
      while (tokenStream.incrementToken()) {
        list.add(charTermAttribute.toString());
      }
      tokenStream.end();
      return list;
    } catch (IOException e) {
      return list;
    }
  }
  return list;
}

tokenStream.reset();
while (tokenStream.incrementToken()) {
 stems.add(token.toString());

@Override
public Result parse(CharSequence text) {
  TokenStream stream;
  try {
    stream = analyzer.tokenStream("text", StrUtil.str(text));
    stream.reset();
  } catch (IOException e) {
    throw new TokenizerException(e);
  }
  return new AnalysisResult(stream);
}

/**
 * @param text
 * @return
 */
public Set<String> getToken(String text) {
  Set<String> list = new LinkedHashSet<>();
  if (CommonUtils.notEmpty(text)) {
    try (StringReader stringReader = new StringReader(text);
        TokenStream tokenStream = dao.getAnalyzer().tokenStream(CommonConstants.BLANK, stringReader)) {
      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
      tokenStream.reset();
      while (tokenStream.incrementToken()) {
        list.add(charTermAttribute.toString());
      }
      tokenStream.end();
      return list;
    } catch (IOException e) {
      return list;
    }
  }
  return list;
}

 TokenStream tokenStream = analyzer.tokenStream(fieldName, reader);
OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

tokenStream.reset();
while (tokenStream.incrementToken()) {
  int startOffset = offsetAttribute.startOffset();
  int endOffset = offsetAttribute.endOffset();
  String term = charTermAttribute.toString();
}

/**
 * Parses the query. Using this instead of a QueryParser in order
 * to avoid thread-safety issues with Lucene's query parser.
 *
 * @param fieldName the name of the field
 * @param value the value of the field
 * @return the parsed query
 */
private Query parseTokens(String fieldName, String value) {
 BooleanQuery searchQuery = new BooleanQuery();
 if (value != null) {
  Analyzer analyzer = new KeywordAnalyzer();
  try {
   TokenStream tokenStream =
    analyzer.tokenStream(fieldName, new StringReader(value));
   tokenStream.reset();
   CharTermAttribute attr =
    tokenStream.getAttribute(CharTermAttribute.class);
   while (tokenStream.incrementToken()) {
    String term = attr.toString();
    Query termQuery = new TermQuery(new Term(fieldName, term));
    searchQuery.add(termQuery, Occur.SHOULD);
   }
  } catch (IOException e) {
   throw new DukeException("Error parsing input string '" + value + "' " +
               "in field " + fieldName);
  }
 }
 return searchQuery;
}

tokenStream.reset();
while (tokenStream.incrementToken()) {
 String term = token.toString();

private static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer,
                   String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
  // Logic similar to QueryParser#getFieldQuery
  try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
    source.reset();
    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
    BytesRefBuilder builder = new BytesRefBuilder();
    while (source.incrementToken()) {
      // UTF-8
      builder.copyChars(termAtt);
      query.add(new Term(field, builder.toBytesRef()));
    }
  }
  query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
  query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
  return query;
}

/** 
 * Creates complex boolean query from the cached tokenstream contents 
 */
protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException {
 BooleanQuery.Builder q = newBooleanQuery();
 List<Term> currentQuery = new ArrayList<>();
 
 TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
 PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
 
 stream.reset();
 while (stream.incrementToken()) {
  if (posIncrAtt.getPositionIncrement() != 0) {
   add(q, currentQuery, operator);
   currentQuery.clear();
  }
  currentQuery.add(new Term(field, termAtt.getBytesRef()));
 }
 add(q, currentQuery, operator);
 
 return q.build();
}

try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
  CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
  ts.reset();
  while (ts.incrementToken()) {
    skipTerms.add(new Term(fieldName, termAtt.toString()));

 new StringTokenStream(attributeFactory, filteredText, text.length()))) {
final TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
if (ts.incrementToken() == false) {
 throw new IllegalStateException("The normalization token stream is "
   + "expected to produce exactly 1 token, but got 0 for analyzer "
if (ts.incrementToken()) {
 throw new IllegalStateException("The normalization token stream is "
   + "expected to produce exactly 1 token, but got 2+ for analyzer "

Javadoc

This method is called by a consumer before it begins consumption using #incrementToken().

Resets this stream to a clean state. Stateful implementations must implement this method so that they can be reused, just as if they had been created fresh.

Popular methods of TokenStream

incrementToken
Consumers (i.e., IndexWriter) use this method to advance the stream to the next token. Implementing
close
Releases resources associated with this stream. If you override this method, always call super.close
addAttribute
end
This method is called by the consumer after the last token has been consumed, after #incrementToken(
getAttribute
hasAttribute
next
Returns the next token in the stream, or null at EOS. When possible, the input Token should be used
clearAttributes
reflectWith
assertFinal
endAttributes
getAttributeImplsIterator

Popular in Java

Parsing JSON documents to java classes using gson
getResourceAsStream (ClassLoader)
getSharedPreferences (Context)
startActivity (Activity)
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
JOptionPane (javax.swing)
Github Copilot alternatives

How to use resetmethodin org.apache.lucene.analysis.TokenStream

Best Java code snippets using org.apache.lucene.analysis.TokenStream.reset (Showing top 20 results out of 1,242)

Refine search

How to use
reset
method
in
org.apache.lucene.analysis.TokenStream