@Override public TokenFilter create(TokenStream input) { return new EdgeNGramTokenFilter(input, minGramSize, maxGramSize, preserveOriginal); } }
return false; state = captureState(); restoreState(state); restoreState(state); posIncrAtt.setPositionIncrement(0); termAtt.copyBuffer(curTermBuffer, 0, curTermLength);
clearAttributes(); if (hasIllegalOffsets) { offsetAtt.setOffset(tokStart, tokEnd);
if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams clearAttributes(); offsetAtt.setOffset(tokStart, tokEnd);
@Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars); return new TokenStreamComponents(components.getTokenizer(), filter); } }
public EdgeNGramTokenFilter create(TokenStream input) { return new EdgeNGramTokenFilter(input, side, minGramSize, maxGramSize); } }
@Override public TokenStream create(TokenStream tokenStream, Version version) { return new EdgeNGramTokenFilter(tokenStream, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE); } },
@Override public TokenFilter create(TokenStream input) { if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) { return new EdgeNGramTokenFilter(input, minGramSize, maxGramSize); } return new Lucene43EdgeNGramTokenFilter(input, minGramSize, maxGramSize); } }
@Override /** * Reproduce the behavior in the org.eaglei.solr.AutoSuggestIndexAnalyzer class. * The first line of code in that class's create method is in the method just above. */ public TokenStream create(final TokenStream tokenStream) { // may want to use keyword tokenizer for more standard "single token" auto-suggest // Tokenizer tokenStream = new KeywordTokenizer(reader); TokenStream result = new StandardFilter( Version.LUCENE_36, tokenStream ); result = new LowerCaseFilter( Version.LUCENE_36, result ); result = new EdgeNGramTokenFilter( result, EdgeNGramTokenFilter.Side.FRONT, 1, 20 ); return result; }
@Override public TokenStream create(TokenStream tokenStream) { TokenStream result = tokenStream; // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect if (side == SIDE_BACK) { result = new ReverseStringFilter(result); } result = new EdgeNGramTokenFilter(result, minGram, maxGram); // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect if (side == SIDE_BACK) { result = new ReverseStringFilter(result); } return result; }
@Override public TokenStream create(TokenStream tokenStream) { TokenStream result = tokenStream; // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect if (side == SIDE_BACK) { result = new ReverseStringFilter(result); } result = new EdgeNGramTokenFilter(result, minGram, maxGram); // side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect if (side == SIDE_BACK) { result = new ReverseStringFilter(result); } return result; } }
@Override public final TokenStream tokenStream(String fieldName, Reader reader) { Tokenizer tokenStream = new StandardTokenizer(Version.LUCENE_36, reader); // may want to use keyword tokenizer for more standard "single token" auto-suggest //Tokenizer tokenStream = new KeywordTokenizer(reader); TokenStream result = new StandardFilter(Version.LUCENE_36, tokenStream); result = new LowerCaseFilter(Version.LUCENE_36, result); result = new EdgeNGramTokenFilter(result, EdgeNGramTokenFilter.Side.FRONT,1, 20); return result; }; }
@Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars); return new TokenStreamComponents(components.getTokenizer(), filter); } }
@Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars); return new TokenStreamComponents(components.getTokenizer(), filter); } }
filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer()))); filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, input -> new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE))); filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, (reader, version) -> { if (version.onOrAfter(org.elasticsearch.Version.V_6_4_0)) { + "Please change the filter name to [edge_ngram] instead."); return new EdgeNGramTokenFilter(reader, EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE); })); filters.add(PreConfiguredTokenFilter.singleton("elision", true,
result = new RemoveLeadingZerosFilter(result); result = new EdgeNGramTokenFilter(result, 2, 7);