/** * Creates a set from a Collection of objects. * * @param c * a collection whose elements to be placed into the set * @param ignoreCase * <code>false</code> if and only if the set should be case sensitive * otherwise <code>true</code>. */ public CharArraySet(Collection<?> c, boolean ignoreCase) { this(c.size(), ignoreCase); addAll(c); }
/** * Creates a set from a Collection of objects. * * @param matchVersion * compatibility match version see <a href="#version">Version * note</a> above for details. * @param c * a collection whose elements to be placed into the set * @param ignoreCase * <code>false</code> if and only if the set should be case sensitive * otherwise <code>true</code>. */ public CharArraySet(Collection<?> c, boolean ignoreCase) { this(c.size(), ignoreCase); addAll(c); }
/** * Creates a stopword set from the given stopword list. * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @param ignoreCase if true, all words are lower cased first * @return A Set ({@link CharArraySet}) containing the words */ public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){ CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase); stopSet.addAll(stopWords); return stopSet; }
/** * Creates a stopword set from the given stopword array. * * @param stopWords An array of stopwords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); stopSet.addAll(Arrays.asList(stopWords)); return stopSet; }
/** * Creates a stopword set from the given stopword list. * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @param ignoreCase if true, all words are lower cased first * @return A Set ({@link CharArraySet}) containing the words */ public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){ CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase); stopSet.addAll(stopWords); return stopSet; }
/** * Creates a stopword set from the given stopword array. * * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords An array of stopwords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); stopSet.addAll(Arrays.asList(stopWords)); return stopSet; }
/** * Returns as {@link CharArraySet} from wordFiles, which * can be a comma-separated list of filenames */ protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException { List<String> files = splitFileNames(wordFiles); CharArraySet words = null; if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start words = new CharArraySet(files.size() * 10, ignoreCase); for (String file : files) { List<String> wlist = getLines(loader, file.trim()); words.addAll(StopFilter.makeStopSet(wlist, ignoreCase)); } } return words; }
/** Creates a new CapitalizationFilterFactory */ public CapitalizationFilterFactory(Map<String, String> args) { super(args); boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false); Set<String> k = getSet(args, KEEP); if (k != null) { keep = new CharArraySet(10, ignoreCase); keep.addAll(k); } k = getSet(args, OK_PREFIX); if (k != null) { okPrefix = new ArrayList<>(); for (String item : k) { okPrefix.add(item.toCharArray()); } } minWordLength = getInt(args, MIN_WORD_LENGTH, 0); maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT); maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true); forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }