@Override public TokenStreamInfo getStream(String fieldName, Reader reader) { Tokenizer tk = (Tokenizer)tokenizer.create(charStream(reader)); TokenStream ts = tk; for (int i=0; i<filters.length; i++) { ts = filters[i].create(ts); } return new TokenStreamInfo(tk,ts); }
@Override public void init(Map<String, String> args) { super.init(args); withOriginal = getBoolean("withOriginal", true); maxPosAsterisk = getInt("maxPosAsterisk", 2); maxPosQuestion = getInt("maxPosQuestion", 1); minTrailing = getInt("minTrailing", 2); maxFractionAsterisk = getFloat("maxFractionAsterisk", 0.0f); }
/** * Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter */ public CommonGramsQueryFilter create(TokenStream input) { CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords, ignoreCase); CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter( commonGrams); return commonGramsQuery; } }
public void init(Map<String, String> args) { super.init(args); maxShingleSize = getInt("maxShingleSize", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); outputUnigrams = getBoolean("outputUnigrams", true); } public ShingleFilter create(TokenStream input) {
@Override public void init(Map<String, String> args) { super.init(args); generateWordParts = getInt("generateWordParts", 1); generateNumberParts = getInt("generateNumberParts", 1); catenateWords = getInt("catenateWords", 0); catenateNumbers = getInt("catenateNumbers", 0); catenateAll = getInt("catenateAll", 0); splitOnCaseChange = getInt("splitOnCaseChange", 1); splitOnNumerics = getInt("splitOnNumerics", 1); preserveOriginal = getInt("preserveOriginal", 0); stemEnglishPossessive = getInt("stemEnglishPossessive", 1); }
public void init(Map<String,String> args) { super.init(args); enablePositionIncrements = getBoolean("enablePositionIncrements", false); }
@Override public TokenStream tokenStream(String fieldName, Reader reader) { return getStream(fieldName, reader).getTokenStream(); }
@Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = null; try { result = new PatternTokenizer(reader, pattern, 0); } catch (IOException e) { } return result; } };
public Tokenizer create(Reader input) { return new WhitespaceTokenizer(new HTMLStripReader(input)) { @Override public void reset(Reader input) throws IOException { super.reset(new HTMLStripReader(input)); } }; } }
/** * Construct a token stream filtering the given input using an Array of common * words to create bigrams. * * @param input Tokenstream in filter chain * @param commonWords words to be used in constructing bigrams */ public CommonGramsFilter(TokenStream input, String[] commonWords) { this(input, commonWords, false); init(); }
/** * Build a CharArraySet from an array of common words, appropriate for passing * into the CommonGramsFilter constructor. This permits this commonWords * construction to be cached once when an Analyzer is constructed. * * @see #makeCommonSet(java.lang.String[], boolean) passing false to * ignoreCase */ public static final CharArraySet makeCommonSet(String[] commonWords) { return makeCommonSet(commonWords, false); }
public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) { super(in); this.encoder = encoder; this.name = name; this.inject = inject; this.termAtt = (TermAttribute) addAttribute(TermAttribute.class); this.posAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); }
protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) { super(input); this.termAtt = (TermAttribute)addAttribute(TermAttribute.class); this.posAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class); this.withOriginal = withOriginal; this.markerChar = markerChar; }
public TrimFilter(TokenStream in, boolean updateOffsets) { super(in); this.updateOffsets = updateOffsets; this.termAtt = (TermAttribute) addAttribute(TermAttribute.class); this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class); }
public SnowballPorterFilter(TokenStream source, SnowballProgram stemmer, CharArraySet protWords) { super(source); this.protWords = protWords; this.stemmer = stemmer; this.termAtt = (TermAttribute)addAttribute(TermAttribute.class); }
public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) { super(in); this.factory = factory; this.termAtt = (TermAttribute) addAttribute(TermAttribute.class); }
public CommonGramsFilter create(TokenStream input) { CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords, ignoreCase); return commonGrams; } }
public Tokenizer create(Reader input) { return new StandardTokenizer(new HTMLStripReader(input)) { @Override public void reset(Reader reader) throws IOException { super.reset(new HTMLStripReader(reader)); } }; } }