org.apache.lucene.analysis.tokenattributes.CharTermAttribute.resizeBuffer java code examples

public PinyinTokenizer(int bufferSize) {
  super();
  termAtt.resizeBuffer(bufferSize);
}

upto += length;
if (upto == buffer.length)
  buffer = termAtt.resizeBuffer(1 + buffer.length);

public KeywordTokenizer(AttributeFactory factory, int bufferSize) {
 super(factory);
 if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
  throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
 }
 termAtt.resizeBuffer(bufferSize);
}

public KeywordTokenizer(int bufferSize) {
 if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
  throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
 }
 termAtt.resizeBuffer(bufferSize);
}

public PathHierarchyTokenizer
  (AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {
 super(factory);
 if (bufferSize < 0) {
  throw new IllegalArgumentException("bufferSize cannot be negative");
 }
 if (skip < 0) {
  throw new IllegalArgumentException("skip cannot be negative");
 }
 termAtt.resizeBuffer(bufferSize);
 this.delimiter = delimiter;
 this.replacement = replacement;
 this.skip = skip;
 resultToken = new StringBuilder(bufferSize);
}

public ReversePathHierarchyTokenizer
  (AttributeFactory factory, int bufferSize, char delimiter, char replacement, int skip) {
 super(factory);
 if (bufferSize < 0) {
  throw new IllegalArgumentException("bufferSize cannot be negative");
 }
 if (skip < 0) {
  throw new IllegalArgumentException("skip cannot be negative");
 }
 termAtt.resizeBuffer(bufferSize);
 this.delimiter = delimiter;
 this.replacement = replacement;
 this.skip = skip;
 resultToken = new StringBuilder(bufferSize);
 resultTokenBuffer = new char[bufferSize];
 delimiterPositions = new ArrayList<>(bufferSize/10);
}

private void appendToToken(char ch) {
 char[] buffer = termAtt.buffer();
 if (tokenUpto == buffer.length) {
  buffer = termAtt.resizeBuffer(tokenUpto + 1);
 }
 buffer[tokenUpto++] = ch;
}

private void appendToToken(char ch) {
 char[] buffer = termAtt.buffer();
 if (tokenUpto == buffer.length) {
  buffer = termAtt.resizeBuffer(tokenUpto + 1);
 }
 buffer[tokenUpto++] = ch;
 sepUpto++;
}

private void init(int minGram, int maxGram, boolean edgesOnly) {
 if (minGram < 1) {
  throw new IllegalArgumentException("minGram must be greater than zero");
 }
 if (minGram > maxGram) {
  throw new IllegalArgumentException("minGram must not be greater than maxGram");
 }
 this.minGram = minGram;
 this.maxGram = maxGram;
 this.edgesOnly = edgesOnly;
 charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
 buffer = new int[charBuffer.getBuffer().length];
 // Make the term att large enough
 termAtt.resizeBuffer(2 * maxGram);
}

@Override
public boolean incrementToken() throws IOException {
 if (input.incrementToken()) {
  char[] chArray = termAtt.buffer();
  int chLen = termAtt.length();
  int idx = 0;
  if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1])) {
   chArray = termAtt.resizeBuffer(chLen + 1);
   for (int i = chLen; i > 1; i--) {
    chArray[i] = chArray[i - 1];
   }
   chArray[1] = '-';
   termAtt.setLength(chLen + 1);
   idx = 2;
   chLen = chLen + 1;
  }
  for (int i = idx; i < chLen;) {
   i += Character.toChars(Character.toLowerCase(chArray[i]), chArray, i);
   }
  return true;
 } else {
  return false;
 }
}

/** 
 * Flushes a unigram token to output from our buffer.
 * This happens when we encounter isolated CJK characters, either the whole
 * CJK string is a single character, or we encounter a CJK character surrounded 
 * by space, punctuation, english, etc, but not beside any other CJK.
 */
private void flushUnigram() {
 clearAttributes();
 char termBuffer[] = termAtt.resizeBuffer(2); // maximum unigram length (2 surrogates)
 int len = Character.toChars(buffer[index], termBuffer, 0);
 termAtt.setLength(len);
 offsetAtt.setOffset(startOffset[index], endOffset[index]);
 typeAtt.setType(SINGLE_TYPE);
 index++;
}

 @Override
 public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
   if (!keywordAttr.isKeyword()) {
    // this stemmer increases word length by 1: worst case '*ã' -> '*ão'
    final int len = termAtt.length();
    final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
    termAtt.setLength(newlen);
   }
   return true;
  } else {
   return false;
  }
 }
}

 @Override
 public boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
   if (!keywordAttr.isKeyword()) {
    // this stemmer increases word length by 1: worst case '*çom' -> '*ción'
    final int len = termAtt.length();
    final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
    termAtt.setLength(newlen);
   }
   return true;
  } else {
   return false;
  }
 }
}

 /**
  * Writes the joined unhyphenated term
  */
 private void unhyphenate() {
  restoreState(savedState);
  savedState = null;
  
  char term[] = termAttribute.buffer();
  int length = hyphenated.length();
  if (length > termAttribute.length()) {
   term = termAttribute.resizeBuffer(length);
  }
  
  hyphenated.getChars(0, length, term, 0);
  termAttribute.setLength(length);
  offsetAttribute.setOffset(offsetAttribute.startOffset(), lastEndOffset);
  hyphenated.setLength(0);
 }
}

/**
 * Writes the concatenation to the attributes
 */
void write() {
 clearAttributes();
 if (termAttribute.length() < buffer.length()) {
  termAttribute.resizeBuffer(buffer.length());
 }
 char termbuffer[] = termAttribute.buffer();
 
 buffer.getChars(0, buffer.length(), termbuffer, 0);
 termAttribute.setLength(buffer.length());
  
 if (hasIllegalOffsets) {
  offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
 }
 else {
  offsetAttribute.setOffset(startOffset, endOffset);
 }
 posIncAttribute.setPositionIncrement(position(true));
 typeAttribute.setType(savedType);
 accumPosInc = 0;
}

/** 
 * Flushes a bigram token to output from our buffer 
 * This is the normal case, e.g. ABC -&gt; AB BC
 */
private void flushBigram() {
 clearAttributes();
 char termBuffer[] = termAtt.resizeBuffer(4); // maximum bigram length in code units (2 supplementaries)
 int len1 = Character.toChars(buffer[index], termBuffer, 0);
 int len2 = len1 + Character.toChars(buffer[index+1], termBuffer, len1);
 termAtt.setLength(len2);
 offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
 typeAtt.setType(DOUBLE_TYPE);
 // when outputting unigrams, all bigrams are synonyms that span two unigrams
 if (outputUnigrams) {
  posIncAtt.setPositionIncrement(0);
  posLengthAtt.setPositionLength(2);
 }
 index++;
}

@Override
public boolean incrementToken() throws IOException {
 if (input.incrementToken()) {
  int len = termAtt.length();
  if (marker != NOMARKER) {
   len++;
   termAtt.resizeBuffer(len);
   termAtt.buffer()[len - 1] = marker;
  }
  reverse( termAtt.buffer(), 0, len );
  termAtt.setLength(len);
  return true;
 } else {
  return false;
 }
}

 /**
  * Constructs a compound token.
  */
 private void gramToken() {
  buffer.append(termAttribute.buffer(), 0, termAttribute.length());
  int endOffset = offsetAttribute.endOffset();

  clearAttributes();

  int length = buffer.length();
  char termText[] = termAttribute.buffer();
  if (length > termText.length) {
   termText = termAttribute.resizeBuffer(length);
  }
  
  buffer.getChars(0, length, termText, 0);
  termAttribute.setLength(length);
  posIncAttribute.setPositionIncrement(0);
  posLenAttribute.setPositionLength(2); // bigram
  offsetAttribute.setOffset(lastStartOffset, endOffset);
  typeAttribute.setType(GRAM_TYPE);
  buffer.setLength(0);
 }
}

@Override
public final boolean incrementToken() throws IOException {
 if (!done) {
  clearAttributes();
  done = true;
  int upto = 0;
  char[] buffer = termAtt.buffer();
  while (true) {
   final int length = input.read(buffer, upto, buffer.length-upto);
   if (length == -1) break;
   upto += length;
   if (upto == buffer.length)
    buffer = termAtt.resizeBuffer(1+buffer.length);
  }
  termAtt.setLength(upto);
  finalOffset = correctOffset(upto);
  offsetAtt.setOffset(correctOffset(0), finalOffset);
  return true;
 }
 return false;
}

 end = start;
} else if (length >= buffer.length-1) { // check if a supplementary could run out of bounds
 buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer

Javadoc

Grows the termBuffer to at least size newSize, preserving the existing content.

Popular methods of CharTermAttribute

length
append
Appends the contents of the other CharTermAttribute to this character sequence. The characters of th
buffer
Returns the internal termBuffer character array which you can then directly alter. If the array is t
setEmpty
Sets the length of the termBuffer to zero. Use this method before appending contents using the Appen
setLength
Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate th
copyBuffer
Copies the contents of buffer, starting at offset for length characters, into the termBuffer array.
charAt
subSequence
toString

Popular in Java

Reactive rest calls using spring rest template
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
runOnUiThread (Activity)
getSupportFragmentManager (FragmentActivity)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
Top Vim plugins

How to use resizeBuffermethodin org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Best Java code snippets using org.apache.lucene.analysis.tokenattributes.CharTermAttribute.resizeBuffer (Showing top 20 results out of 315)

How to use
resizeBuffer
method
in
org.apache.lucene.analysis.tokenattributes.CharTermAttribute