@Override public Analyzer createAnalyzer() { return new IrishAnalyzer(); }
@Override public TokenStream create(TokenStream input) { return new IrishLowerCaseFilter(input); }
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { char[] chArray = termAtt.buffer(); int chLen = termAtt.length(); int idx = 0; if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1])) { chArray = termAtt.resizeBuffer(chLen + 1); for (int i = chLen; i > 1; i--) { chArray[i] = chArray[i - 1]; } chArray[1] = '-'; termAtt.setLength(chLen + 1); idx = 2; chLen = chLen + 1; } for (int i = idx; i < chLen;) { i += Character.toChars(Character.toLowerCase(chArray[i]), chArray, i); } return true; } else { return false; } }
public IrishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new IrishAnalyzer(Analysis.parseStopWords(env, settings, IrishAnalyzer.getDefaultStopSet()), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); analyzer.setVersion(version); }
@SuppressWarnings("resource") @Override protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed, final boolean stopWordsAllowed) throws IOException { if (stemsAllowed) { CharArraySet stopWords = stopWordsAllowed ? IrishAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET; return new IrishAnalyzer(stopWords).tokenStream("", new StringReader(strOrig)); } else { return getStandardTokenStream(strOrig); } } }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new IrishLowerCaseFilter(stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, IrishAnalyzer.getDefaultStopSet()); if (useStem) { if (!stemExclusionSet.isEmpty()) stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); stream = new SnowballFilter(stream, new IrishStemmer()); } return stream; }
@Override protected CharArraySet build() { return IrishAnalyzer.getDefaultStopSet(); } },
protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source; if (getVersion().onOrAfter(Version.LUCENE_4_7_0)) { source = new StandardTokenizer(); } else { result = new StopFilter(result, HYPHENATIONS); result = new ElisionFilter(result, DEFAULT_ARTICLES); result = new IrishLowerCaseFilter(result); result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty())
public IrishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new IrishAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, IrishAnalyzer.getDefaultStopSet()), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET) ); analyzer.setVersion(version); }
@Override protected TokenStream normalize(String fieldName, TokenStream in) { TokenStream result = new ElisionFilter(in, DEFAULT_ARTICLES); result = new IrishLowerCaseFilter(result); return result; } }
@Override protected Analyzer build() { return new IrishAnalyzer(); } },
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { char[] chArray = termAtt.buffer(); int chLen = termAtt.length(); int idx = 0; if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1])) { chArray = termAtt.resizeBuffer(chLen + 1); for (int i = chLen; i > 1; i--) { chArray[i] = chArray[i - 1]; } chArray[1] = '-'; termAtt.setLength(chLen + 1); idx = 2; chLen = chLen + 1; } for (int i = idx; i < chLen;) { i += Character.toChars(Character.toLowerCase(chArray[i]), chArray, i); } return true; } else { return false; } }
IrishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new IrishAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, IrishAnalyzer.getDefaultStopSet()), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET) ); analyzer.setVersion(version); }
@Override public TokenStream create(TokenStream input) { return new IrishLowerCaseFilter(input); }
@Override protected Analyzer create(Version version) { Analyzer a = new IrishAnalyzer(); a.setVersion(version.luceneVersion); return a; } },
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { char[] chArray = termAtt.buffer(); int chLen = termAtt.length(); int idx = 0; if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1])) { chArray = termAtt.resizeBuffer(chLen + 1); for (int i = chLen; i > 1; i--) { chArray[i] = chArray[i - 1]; } chArray[1] = '-'; termAtt.setLength(chLen + 1); idx = 2; chLen = chLen + 1; } for (int i = idx; i < chLen;) { i += Character.toChars(Character.toLowerCase(chArray[i]), chArray, i); } return true; } else { return false; } }
/** * Creates a * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link IrishLowerCaseFilter}, {@link StopFilter} * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StopFilter(source, HYPHENATIONS); result = new ElisionFilter(result, DEFAULT_ARTICLES); result = new IrishLowerCaseFilter(result); result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new IrishStemmer()); return new TokenStreamComponents(source, result); }
@Override public TokenStream create(TokenStream tokenStream) { if (lang == null) { return new LowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("greek")) { return new GreekLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("irish")) { return new IrishLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("turkish")) { return new TurkishLowerCaseFilter(tokenStream); } else { throw new IllegalArgumentException("language [" + lang + "] not support for lower case"); } }
@Override public TokenStream create(TokenStream tokenStream) { if (lang == null) { return new LowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("greek")) { return new GreekLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("irish")) { return new IrishLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("turkish")) { return new TurkishLowerCaseFilter(tokenStream); } else { throw new IllegalArgumentException("language [" + lang + "] not support for lower case"); } }
static public TokenStream irish(TokenStream result) { result = new StopFilter(result, IRISH_HYPHENATIONS); result = new ElisionFilter(result, IRISH_DEFAULT_ARTICLES); result = new IrishLowerCaseFilter(result); result = new SnowballFilter(result, new IrishStemmer()); return result; }