/** * Creates NGramTokenizer with given min and max n-grams. * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ public Lucene43NGramTokenizer(int minGram, int maxGram) { init(minGram, maxGram); }
clearAttributes(); if (!started) { started = true; pos++; termAtt.setEmpty().append(inStr, oldPos, oldPos+gramSize); offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize)); return true;
@Override public Tokenizer create(AttributeFactory factory) { if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) { return new EdgeNGramTokenizer(factory, minGramSize, maxGramSize); } return new Lucene43NGramTokenizer(factory, minGramSize, maxGramSize); } }
@Override public void end() throws IOException { super.end(); // set final offset final int finalOffset = correctOffset(charsRead); this.offsetAtt.setOffset(finalOffset, finalOffset); }
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */ @Override public Tokenizer create(AttributeFactory factory) { if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) { return new NGramTokenizer(factory, minGramSize, maxGramSize); } else { return new Lucene43NGramTokenizer(factory, minGramSize, maxGramSize); } } }
/** * Creates NGramTokenizer with given min and max n-grams. * @param factory {@link org.apache.lucene.util.AttributeFactory} to use * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ public Lucene43NGramTokenizer(AttributeFactory factory, int minGram, int maxGram) { super(factory); init(minGram, maxGram); }