/** * This method performs a filtration of the search text. * * @param searchText search text * @param stopWordsFile file that contains stop words * @return result of filtration */ private String filter(String searchText, String stopWordsFile) { StopFilterFactory filterFactory = new StopFilterFactory(); Map<String, String> arguments = new HashMap<>(); arguments.put("words", stopWordsFile); arguments.put("luceneMatchVersion", String.valueOf(Version.LUCENE_31)); arguments.put("ignoreCase", String.valueOf(ignoreCase)); filterFactory.init(arguments); filterFactory.inform(new HibernateSearchResourceLoader()); Set<String> stopWords = (Set<String>)filterFactory.getStopWords(); List<String> searchTerms = splitSearchText(searchText); searchTerms.removeAll(stopWords); return joinSearchTerms(searchTerms); }
public void inform(ResourceLoader loader) { String stopWordFiles = args.get("words"); ignoreCase = getBoolean("ignoreCase",false); enablePositionIncrements = getBoolean("enablePositionIncrements",false); if (stopWordFiles != null) { try { List<String> files = StrUtils.splitFileNames(stopWordFiles); if (stopWords == null && files.size() > 0){ //default stopwords list has 35 or so words, but maybe don't make it that big to start stopWords = new CharArraySet(files.size() * 10, ignoreCase); } for (String file : files) { List<String> wlist = loader.getLines(file.trim()); //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call stopWords.addAll(StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), ignoreCase)); } } catch (IOException e) { throw new RuntimeException(e); } } else { stopWords = (CharArraySet) StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase); } } //Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095