@Override public void copyTo(AttributeImpl target) { if (target instanceof PackedTokenAttributeImpl) { final PackedTokenAttributeImpl to = (PackedTokenAttributeImpl) target; to.copyBuffer(buffer(), 0, length()); to.positionIncrement = positionIncrement; to.positionLength = positionLength; to.startOffset = startOffset; to.endOffset = endOffset; to.type = type; to.termFrequency = termFrequency; } else { super.copyTo(target); ((OffsetAttribute) target).setOffset(startOffset, endOffset); ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement); ((PositionLengthAttribute) target).setPositionLength(positionLength); ((TypeAttribute) target).setType(type); ((TermFrequencyAttribute) target).setTermFrequency(termFrequency); } }
public static PackedTokenAttributeImpl subToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength) { //CharTermAttributeImpl termImpl = new CharTermAttributeImpl(); //termImpl.copyBuffer(oriToken.buffer(), termBufferOffset, termBufferLength); //new Token(oriToken.buffer(), termBufferOffset, termBufferLength, // oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); //Token token = new Token(termImpl, oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); PackedTokenAttributeImpl token = new PackedTokenAttributeImpl(); token.copyBuffer(oriToken.buffer(), termBufferOffset, termBufferLength); token.setOffset(oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); token.setType(oriToken.type()); return token; } }
public final boolean incrementToken() throws IOException { clearAttributes(); PackedTokenAttributeImpl token = nextToken(reusableToken); if(token != null) { termAtt.copyBuffer(token.buffer(), 0, token.length()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); return true; } else { return false; } }
reusableToken = new PackedTokenAttributeImpl(); reusableToken.clear(); if(termAtt != null) { reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length()); reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); reusableToken.setType(typeAtt.type());
(Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type()) || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type())) ) { final char[] buffer = nextToken.buffer(); final int length = nextToken.length(); byte lastType = (byte) Character.getType(buffer[0]); //与上次的字符是否同类 int termBufferOffset = 0;
/** Resets the term text, payload, flags, positionIncrement, positionLength, * startOffset, endOffset and token type to default. */ @Override public void clear() { super.clear(); flags = 0; payload = null; }
public CutLetterDigitFilter(TokenStream input) { super(input); reusableToken = new PackedTokenAttributeImpl(); termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); }
private void addToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength, byte type) { PackedTokenAttributeImpl token = TokenUtils.subToken(oriToken, termBufferOffset, termBufferLength); if(type == Character.DECIMAL_DIGIT_NUMBER) { token.setType(Word.TYPE_DIGIT); } else { token.setType(Word.TYPE_LETTER); } tokenQueue.offer(token); }
public static PackedTokenAttributeImpl subToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength) { //CharTermAttributeImpl termImpl = new CharTermAttributeImpl(); //termImpl.copyBuffer(oriToken.buffer(), termBufferOffset, termBufferLength); //new Token(oriToken.buffer(), termBufferOffset, termBufferLength, // oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); //Token token = new Token(termImpl, oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); PackedTokenAttributeImpl token = new PackedTokenAttributeImpl(); token.copyBuffer(oriToken.buffer(), termBufferOffset, termBufferLength); token.setOffset(oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); token.setType(oriToken.type()); return token; } }
public final boolean incrementToken() throws IOException { clearAttributes(); PackedTokenAttributeImpl token = nextToken(reusableToken); if(token != null) { termAtt.copyBuffer(token.buffer(), 0, token.length()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); return true; } else { return false; } }
reusableToken = new PackedTokenAttributeImpl(); reusableToken.clear(); if(termAtt != null) { reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length()); reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); reusableToken.setType(typeAtt.type());
(Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type()) || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type())) ) { final char[] buffer = nextToken.buffer(); final int length = nextToken.length(); byte lastType = (byte) Character.getType(buffer[0]); //与上次的字符是否同类 int termBufferOffset = 0;
public CutLetterDigitFilter(TokenStream input) { super(input); reusableToken = new PackedTokenAttributeImpl(); termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); }
private void addToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength, byte type) { PackedTokenAttributeImpl token = TokenUtils.subToken(oriToken, termBufferOffset, termBufferLength); if(type == Character.DECIMAL_DIGIT_NUMBER) { token.setType(Word.TYPE_DIGIT); } else { token.setType(Word.TYPE_LETTER); } tokenQueue.offer(token); }
public static PackedTokenAttributeImpl subToken(PackedTokenAttributeImpl oriToken, int termBufferOffset, int termBufferLength) { //CharTermAttributeImpl termImpl = new CharTermAttributeImpl(); //termImpl.copyBuffer(oriToken.buffer(), termBufferOffset, termBufferLength); //new Token(oriToken.buffer(), termBufferOffset, termBufferLength, // oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); //Token token = new Token(termImpl, oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); PackedTokenAttributeImpl token = new PackedTokenAttributeImpl(); token.copyBuffer(oriToken.buffer(), termBufferOffset, termBufferLength); token.setOffset(oriToken.startOffset()+termBufferOffset, oriToken.startOffset()+termBufferOffset+termBufferLength); token.setType(oriToken.type()); return token; } }
public final boolean incrementToken() throws IOException { clearAttributes(); PackedTokenAttributeImpl token = nextToken(reusableToken); if(token != null) { termAtt.copyBuffer(token.buffer(), 0, token.length()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); return true; } else { return false; } }
@Override public void copyTo(AttributeImpl target) { if (target instanceof PackedTokenAttributeImpl) { final PackedTokenAttributeImpl to = (PackedTokenAttributeImpl) target; to.copyBuffer(buffer(), 0, length()); to.positionIncrement = positionIncrement; to.positionLength = positionLength; to.startOffset = startOffset; to.endOffset = endOffset; to.type = type; } else { super.copyTo(target); ((OffsetAttribute) target).setOffset(startOffset, endOffset); ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement); ((PositionLengthAttribute) target).setPositionLength(positionLength); ((TypeAttribute) target).setType(type); } }
reusableToken = new PackedTokenAttributeImpl(); reusableToken.clear(); if(termAtt != null) { reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length()); reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); reusableToken.setType(typeAtt.type());
(Word.TYPE_LETTER_OR_DIGIT.equalsIgnoreCase(nextToken.type()) || Word.TYPE_DIGIT_OR_LETTER.equalsIgnoreCase(nextToken.type())) ) { final char[] buffer = nextToken.buffer(); final int length = nextToken.length(); byte lastType = (byte) Character.getType(buffer[0]); //与上次的字符是否同类 int termBufferOffset = 0;
public CutLetterDigitFilter(TokenStream input) { super(input); reusableToken = new PackedTokenAttributeImpl(); termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); }