/** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */ @Override public Tokenizer create(AttributeFactory factory) { return new NGramTokenizer(factory, minGramSize, maxGramSize); } }
@Override protected Tokenizer create(Version version) { return new NGramTokenizer(); } },
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader}. */ public NGramTokenizer create(Reader input) { return new NGramTokenizer(input, minGramSize, maxGramSize); } }
@Override public Tokenizer create() { if (matcher == null) { return new NGramTokenizer(minGram, maxGram); } else { return new NGramTokenizer(minGram, maxGram) { @Override protected boolean isTokenChar(int chr) { return matcher.isTokenChar(chr); } }; } }
@Override public Tokenizer create() { if (matcher == null) { return new NGramTokenizer(minGram, maxGram); } else { return new NGramTokenizer(minGram, maxGram) { @Override protected boolean isTokenChar(int chr) { return matcher.isTokenChar(chr); } }; } }
@Override public Tokenizer create() { if (matcher == null) { return new NGramTokenizer(minGram, maxGram); } else { return new NGramTokenizer(minGram, maxGram) { @Override protected boolean isTokenChar(int chr) { return matcher.isTokenChar(chr); } }; } }
/** Creates the {@link TokenStream} of n-grams from the given {@link Reader} and {@link AttributeFactory}. */ @Override public Tokenizer create(AttributeFactory factory) { if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) { return new NGramTokenizer(factory, minGramSize, maxGramSize); } else { return new Lucene43NGramTokenizer(factory, minGramSize, maxGramSize); } } }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer src = new NGramTokenizer(minGram, maxGram); TokenStream stream = new LowerCaseFilter(src); return new TokenStreamComponents(src, stream); }
private static PhraseQuery build(String fieldName, String fieldValue, int gramSize) { Preconditions.checkArgument(fieldValue.length()>=gramSize); PhraseQuery.Builder builder = new PhraseQuery.Builder(); try (NGramTokenizer tokenizer = new NGramTokenizer(gramSize, gramSize)) { tokenizer.setReader(new StringReader(fieldValue.toLowerCase())); tokenizer.reset(); while (tokenizer.incrementToken()) { builder.add(new Term(fieldName, tokenizer.getAttribute(CharTermAttribute.class).toString())); } } catch (IOException e) { throw new RuntimeException(e); } return builder.build(); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source; TokenStream stream; if("_name_prefix".equals(fieldName)) { source = new NGramTokenizer(reader,mingram,maxgram); stream = new WordDelimiterFilter(new LowerCaseFilter(Version.LUCENE_43,source), WordDelimiterFilter.ALPHANUM, CharArraySet.EMPTY_SET); } else if(fieldName.startsWith("_ngram_")) { source = new NGramTokenizer(reader,3,4); stream = null; } else { source = new WhitespaceTokenizer(Version.LUCENE_43,reader); stream = new WordDelimiterFilter(new LowerCaseFilter(Version.LUCENE_43,source), WordDelimiterFilter.ALPHANUM, CharArraySet.EMPTY_SET); } return new TokenStreamComponents(source, stream); }
@Override protected TokenStreamComponents createComponents(final String field) { final Tokenizer source = new NGramTokenizer(minNgram(), maxNgram()); final TokenStream result = new StopFilter( new LowerCaseFilter(new StandardFilter(source)), new CharArraySet(asList(stopWords()), true)); return new TokenStreamComponents(source, result); }
tokenizer = new NGramTokenizer(2, maxNGramLength); tokenizer.setReader(reader); tokenizer.reset();