/** * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range * * @param factory {@link org.apache.lucene.util.AttributeFactory} to use * @param side the {@link Side} from which to chop off an n-gram * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ public Lucene43EdgeNGramTokenizer(AttributeFactory factory, Side side, int minGram, int maxGram) { super(factory); init(side, minGram, maxGram); }
clearAttributes(); int end = start + gramSize; termAtt.setEmpty().append(inStr, start, end); offsetAtt.setOffset(correctOffset(start), correctOffset(end)); gramSize++; return true;
@Override public void end() throws IOException { super.end(); // set final offset final int finalOffset = correctOffset(charsRead); this.offsetAtt.setOffset(finalOffset, finalOffset); }
/** * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range * * @param side the {@link Side} from which to chop off an n-gram * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ public Lucene43EdgeNGramTokenizer(Side side, int minGram, int maxGram) { init(side, minGram, maxGram); }