@Override public SimplePatternTokenizer create(final AttributeFactory factory) { return new SimplePatternTokenizer(factory, dfa); } }
private int nextCodeUnit() throws IOException { int result; if (pendingUpto < pendingLimit) { result = pendingChars[pendingUpto++]; if (pendingUpto == pendingLimit) { // We used up the pending buffer pendingUpto = 0; pendingLimit = 0; } appendToToken((char) result); offset++; } else if (bufferLimit == -1) { return -1; } else { assert bufferNextRead <= bufferLimit: "bufferNextRead=" + bufferNextRead + " bufferLimit=" + bufferLimit; if (bufferNextRead == bufferLimit) { bufferLimit = input.read(buffer, 0, buffer.length); if (bufferLimit == -1) { return -1; } bufferNextRead = 0; } result = buffer[bufferNextRead++]; offset++; appendToToken((char) result); } return result; }
private int nextCodePoint() throws IOException { int ch = nextCodeUnit(); if (ch == -1) { return ch; } if (Character.isHighSurrogate((char) ch)) { return Character.toCodePoint((char) ch, (char) nextCodeUnit()); } else { return ch; } } }
@Override public boolean incrementToken() throws IOException { clearAttributes(); tokenUpto = 0; int ch = nextCodePoint(); if (ch == -1) { return false; ch = nextCodePoint(); if (ch == -1) { break; pushBack(extra); offsetAtt.setOffset(correctOffset(offsetStart), correctOffset(offsetStart+lastAcceptLength)); return true; } else if (ch == -1) { } else { pushBack(tokenUpto-1); tokenUpto = 0;
@Override public void end() throws IOException { super.end(); final int ofs = correctOffset(offset + pendingLimit - pendingUpto); offsetAtt.setOffset(ofs, ofs); }
@Override public Tokenizer create() { return new SimplePatternTokenizer(pattern); } }