org.apache.lucene.analysis.TokenStream.addAttribute java code examples

Refine search

TokenStream.incrementToken

 TokenStream tokenStream = analyzer.tokenStream(fieldName, reader);
OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

tokenStream.reset();
while (tokenStream.incrementToken()) {
  int startOffset = offsetAttribute.startOffset();
  int endOffset = offsetAttribute.endOffset();
  String term = charTermAttribute.toString();
}

 TokenStream stream = analyzer.tokenStream(null, new StringReader(text));
CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
 System.out.println(cattr.toString());
}
stream.end();
stream.close();

private SToken[] getTokens(String text) throws IOException {
  //FIXME somehow integrate below cycle to getSummary to save the cloning and memory,
  //also creating Tokens is suboptimal with 3.0.0 , this whole class could be replaced by highlighter        
  ArrayList<SToken> result = new ArrayList<>();
  try (TokenStream ts = analyzer.tokenStream("full", text)) {
    CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
      SToken t = new SToken(term.buffer(), 0, term.length(), offset.startOffset(), offset.endOffset());
      result.add(t);
    }
    ts.end();
  }
  return result.toArray(new SToken[result.size()]);
}

try (TokenStream ts = normalize(fieldName,
  new StringTokenStream(attributeFactory, filteredText, text.length()))) {
 final TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
 ts.reset();
 if (ts.incrementToken() == false) {
  throw new IllegalStateException("The normalization token stream is "
    + "expected to produce exactly 1 token, but got 0 for analyzer "
 if (ts.incrementToken()) {
  throw new IllegalStateException("The normalization token stream is "
    + "expected to produce exactly 1 token, but got 2+ for analyzer "

final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
int prevIncr = 1;
int state = -1;
while (in.incrementToken()) {
 int currentIncr = posIncAtt.getPositionIncrement();
 if (pos == -1 && currentIncr < 1) {

  offsetAttribute = (OffsetAttribute) tokens.addAttribute(OffsetAttribute.class);
      .addAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAttribute = (CharTermAttribute) tokens.addAttribute(CharTermAttribute.class);
while (tokens.incrementToken())

builder.createState();
final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);
Position posData = null;
int maxOffset = 0;
while (in.incrementToken()) {
 int posInc = posIncAtt.getPositionIncrement();
 if (preservePositionIncrements == false && posInc > 1) {

 Reader reader = new StringReader("This is a test string");
TokenStream tokenizer = new StandardTokenizer(Version.LUCENE_36, reader);
tokenizer = new ShingleFilter(tokenizer, 1, 3);
CharTermAttribute charTermAttribute = tokenizer.addAttribute(CharTermAttribute.class);

while (tokenizer.incrementToken()) {
  String token = charTermAttribute.toString();
  //Do something
}

/**
 * @param text
 * @return
 */
public Set<String> getToken(String text) {
  Set<String> list = new LinkedHashSet<>();
  if (CommonUtils.notEmpty(text)) {
    try (StringReader stringReader = new StringReader(text);
        TokenStream tokenStream = dao.getAnalyzer().tokenStream(CommonConstants.BLANK, stringReader)) {
      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
      tokenStream.reset();
      while (tokenStream.incrementToken()) {
        list.add(charTermAttribute.toString());
      }
      tokenStream.end();
      return list;
    } catch (IOException e) {
      return list;
    }
  }
  return list;
}

/**
 * @param text
 * @return
 */
public Set<String> getToken(String text) {
  Set<String> list = new LinkedHashSet<>();
  if (CommonUtils.notEmpty(text)) {
    try (StringReader stringReader = new StringReader(text);
        TokenStream tokenStream = dao.getAnalyzer().tokenStream(CommonConstants.BLANK, stringReader)) {
      CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
      tokenStream.reset();
      while (tokenStream.incrementToken()) {
        list.add(charTermAttribute.toString());
      }
      tokenStream.end();
      return list;
    } catch (IOException e) {
      return list;
    }
  }
  return list;
}

private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
  try {
    stream.reset();
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
    TypeAttribute type = stream.addAttribute(TypeAttribute.class);
    PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
    while (stream.incrementToken()) {
      int increment = posIncr.getPositionIncrement();
      if (increment > 0) {
        lastPosition = lastPosition + increment;
      }
      tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
        lastOffset + offset.endOffset(), posLen.getPositionLength(), type.type(),
        extractExtendedAttributes(stream, includeAttributes)));
    }
    stream.end();
    lastOffset += offset.endOffset();
    lastPosition += posIncr.getPositionIncrement();
    lastPosition += analyzer.getPositionIncrementGap(field);
    lastOffset += analyzer.getOffsetGap(field);
  } catch (IOException e) {
    throw new ElasticsearchException("failed to analyze", e);
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}

try (TokenStream stream = analyzer.tokenStream(field, text)) {
  stream.reset();
  CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
  PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
  TypeAttribute type = stream.addAttribute(TypeAttribute.class);
  PositionLengthAttribute posLen = stream.addAttribute(PositionLengthAttribute.class);
  while (stream.incrementToken()) {
    int increment = posIncr.getPositionIncrement();
    if (increment > 0) {

CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
  skipTerms.add(new Term(fieldName, termAtt.toString()));

source.reset();
List<BytesRef> currentPos = new ArrayList<>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);
  boolean hasMoreTokens = source.incrementToken();
  while (hasMoreTokens) {
    if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
    hasMoreTokens = source.incrementToken();

CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);
    if (!source.incrementToken()) break;
  } catch (IOException e) {
    break;

int tokenCount = 0;
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
  String word = termAtt.toString();
  tokenCount++;

private static Query parseQueryString(ExtendedCommonTermsQuery query, Object queryString, String field, Analyzer analyzer,
                   String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
  // Logic similar to QueryParser#getFieldQuery
  try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
    source.reset();
    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
    BytesRefBuilder builder = new BytesRefBuilder();
    while (source.incrementToken()) {
      // UTF-8
      builder.copyChars(termAtt);
      query.add(new Term(field, builder.toBytesRef()));
    }
  }
  query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
  query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
  return query;
}

CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
reuse.clear();
while (ts.incrementToken()) {
 int length = termAtt.length();
 if (length == 0) {

if (normalizer != null) {
  try (TokenStream ts = normalizer.tokenStream(name(), value)) {
    final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    if (ts.incrementToken() == false) {
     throw new IllegalStateException("The normalization token stream is "
       + "expected to produce exactly 1 token, but got 0 for analyzer "
    if (ts.incrementToken()) {
     throw new IllegalStateException("The normalization token stream is "
       + "expected to produce exactly 1 token, but got 2+ for analyzer "

  ts = new DeDuplicatingTokenFilter(ts, dupSequenceSpotter);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
try {
  while (ts.incrementToken()) {
    if (dupSequenceSpotter != null) {
      long newTrieSize = dupSequenceSpotter.getEstimatedSizeInBytes();

Popular methods of TokenStream

incrementToken
Consumers (i.e., IndexWriter) use this method to advance the stream to the next token. Implementing
reset
This method is called by a consumer before it begins consumption using #incrementToken(). Resets thi
close
Releases resources associated with this stream. If you override this method, always call super.close
end
This method is called by the consumer after the last token has been consumed, after #incrementToken(
getAttribute
hasAttribute
next
Returns the next token in the stream, or null at EOS. When possible, the input Token should be used
clearAttributes
reflectWith
assertFinal
endAttributes
getAttributeImplsIterator

Popular in Java

Updating database using SQL prepared statement
getContentResolver (Context)
getSupportFragmentManager (FragmentActivity)
getSharedPreferences (Context)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
JList (javax.swing)
Top Vim plugins

How to use addAttributemethodin org.apache.lucene.analysis.TokenStream

Best Java code snippets using org.apache.lucene.analysis.TokenStream.addAttribute (Showing top 20 results out of 810)

Refine search

How to use
addAttribute
method
in
org.apache.lucene.analysis.TokenStream