org.apache.lucene.analysis.CharArraySet.<init> java code examples

/**
 * Returns a copy of the given set as a {@link CharArraySet}. If the given set
 * is a {@link CharArraySet} the ignoreCase property will be preserved.
 * 
 * @param set
 *          a set to copy
 * @return a copy of the given set as a {@link CharArraySet}. If the given set
 *         is a {@link CharArraySet} the ignoreCase property as well as the
 *         matchVersion will be of the given set will be preserved.
 */
public static CharArraySet copy(final Set<?> set) {
 if(set == EMPTY_SET)
  return EMPTY_SET;
 if(set instanceof CharArraySet) {
  final CharArraySet source = (CharArraySet) set;
  return new CharArraySet(CharArrayMap.copy(source.map));
 }
 return new CharArraySet(set, false);
}

/**
 * Creates a stopword set from the given stopword array.
 * 
 * @param stopWords An array of stopwords
 * @param ignoreCase If true, all words are lower cased first.  
 * @return a Set containing the words
 */    
public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
 CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
 stopSet.addAll(Arrays.asList(stopWords));
 return stopSet;
}

/**
 * Creates a stopword set from the given stopword list.
 * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
 * @param ignoreCase if true, all words are lower cased first
 * @return A Set ({@link CharArraySet}) containing the words
 */
public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){
 CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
 stopSet.addAll(stopWords);
 return stopSet;
}

/**
 * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
 * leading and trailing whitespace). Every line of the Reader should contain only
 * one word. The words need to be in lowercase if you make use of an
 * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
 *
 * @param reader Reader containing the wordlist
 * @return A {@link CharArraySet} with the reader's words
 */
public static CharArraySet getWordSet(Reader reader) throws IOException {
 return getWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false));
}

/**
 * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
 * leading and trailing whitespace). Every line of the Reader should contain only
 * one word. The words need to be in lowercase if you make use of an
 * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
 *
 * @param reader Reader containing the wordlist
 * @param comment The string representing a comment.
 * @return A CharArraySet with the reader's words
 */
public static CharArraySet getWordSet(Reader reader, String comment) throws IOException {
 return getWordSet(reader, comment, new CharArraySet(INITIAL_CAPACITY, false));
}

/**
 * Reads stopwords from a stopword list in Snowball format.
 * <p>
 * The snowball format is the following:
 * <ul>
 * <li>Lines may contain multiple words separated by whitespace.
 * <li>The comment character is the vertical line (&#124;).
 * <li>Lines may contain trailing comments.
 * </ul>
 * 
 * @param reader Reader containing a Snowball stopword list
 * @return A {@link CharArraySet} with the reader's words
 */
public static CharArraySet getSnowballWordSet(Reader reader) throws IOException {
 return getSnowballWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false));
}

/**
 * Returns an unmodifiable {@link CharArraySet}. This allows to provide
 * unmodifiable views of internal sets for "read-only" use.
 * 
 * @param set
 *          a set for which the unmodifiable set is returned.
 * @return an new unmodifiable {@link CharArraySet}.
 * @throws NullPointerException
 *           if the given set is <code>null</code>.
 */
public static CharArraySet unmodifiableSet(CharArraySet set) {
 if (set == null)
  throw new NullPointerException("Given set is null");
 if (set == EMPTY_SET)
  return EMPTY_SET;
 if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap)
  return set;
 return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
}

try {
 reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
 return WordlistLoader.getWordSet(reader, comment, new CharArraySet(16, ignoreCase));
} finally {
 IOUtils.close(reader);

public RuleWithSetExceptions(String suffix, int min, String replacement,
  String[] exceptions) {
 super(suffix, min, replacement);
 for (int i = 0; i < exceptions.length; i++) {
  if (!exceptions[i].endsWith(suffix))
   throw new RuntimeException("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
 }
 this.exceptions = new CharArraySet(Arrays.asList(exceptions), false);
}

private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) {
  if (namedWords == null) {
    return new CharArraySet(words, ignoreCase);
  }
  CharArraySet setWords = new CharArraySet(words.size(), ignoreCase);
  for (String word : words) {
    if (namedWords.containsKey(word)) {
      setWords.addAll(namedWords.get(word));
    } else {
      setWords.add(word);
    }
  }
  return setWords;
}

/**
 * Sole constructor
 *
 * Collects at most <code>num</code> completions
 * with corresponding document and weight
 */
TopSuggestGroupDocsCollector(int num, boolean skipDuplicates) {
  super(1, skipDuplicates);
  if (num <= 0) {
    throw new IllegalArgumentException("'num' must be > 0");
  }
  this.num = num;
  this.priorityQueue = new SuggestScoreDocPriorityQueue(num);
  if (skipDuplicates) {
    seenSurfaceForms = new CharArraySet(num, false);
    pendingResults = new ArrayList<>();
  } else {
    seenSurfaceForms = null;
    pendingResults = null;
  }
}

public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion) {
  String value = settings.get("stem_exclusion");
  if ("_none_".equals(value)) {
    return CharArraySet.EMPTY_SET;
  }
  List<String> stemExclusion = settings.getAsList("stem_exclusion", null);
  if (stemExclusion != null) {
    // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
    return new CharArraySet(stemExclusion, false);
  } else {
    return defaultStemExclusion;
  }
}

@Override
public void inform(ResourceLoader loader) throws IOException {
 if (stopWordFiles != null) {
  if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
   stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
  } else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
   stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
  } else {
   throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format);
  }
 } else {
  if (null != format) {
   throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
  }
  stopWords = new CharArraySet(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
 }
}

@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
 Set<String> stopWords = stopWordsPerField.get(fieldName);
 if (stopWords == null) {
  return components;
 }
 StopFilter stopFilter = new StopFilter(components.getTokenStream(), 
   new CharArraySet(stopWords, false));
 return new TokenStreamComponents(components.getTokenizer(), stopFilter);
}

/**
 * Creates a new HTMLStripCharFilter over the provided Reader
 * with the specified start and end tags.
 * @param source Reader to strip html tags from.
 * @param escapedTags Tags in this set (both start and end tags)
 *  will not be filtered out.
 */
public HTMLStripCharFilter(Reader source, Set<String> escapedTags) {
 super(source);
 this.zzReader = source;
 if (null != escapedTags) {
  for (String tag : escapedTags) {
   if (tag.equalsIgnoreCase("BR")) {
    escapeBR = true;
   } else if (tag.equalsIgnoreCase("SCRIPT")) {
    escapeSCRIPT = true;
   } else if (tag.equalsIgnoreCase("STYLE")) {
    escapeSTYLE = true;
   } else {
    if (null == this.escapedTags) {
     this.escapedTags = new CharArraySet(16, true);
    }
    this.escapedTags.add(tag);
   }
  }
 }
}

public static CharArraySet getWordSet(Environment env, org.elasticsearch.Version indexCreatedVersion, Settings settings,
                   String settingsPrefix) {
  List<String> wordList = getWordList(env, settings, settingsPrefix);
  if (wordList == null) {
    return null;
  }
  boolean ignoreCase =
    settings.getAsBooleanLenientForPreEs6Indices(indexCreatedVersion, settingsPrefix + "_case", false, deprecationLogger);
  return new CharArraySet(wordList, ignoreCase);
}

/**
 * Find the unique stem(s) of the provided word
 * 
 * @param word Word to find the stems for
 * @return List of stems for the word
 */
public List<CharsRef> uniqueStems(char word[], int length) {
 List<CharsRef> stems = stem(word, length);
 if (stems.size() < 2) {
  return stems;
 }
 CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
 List<CharsRef> deduped = new ArrayList<>();
 for (CharsRef s : stems) {
  if (!terms.contains(s)) {
   deduped.add(s);
   terms.add(s);
  }
 }
 return deduped;
}

/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
 * except the input is in snowball format. */
protected final CharArraySet getSnowballWordSet(ResourceLoader loader,
  String wordFiles, boolean ignoreCase) throws IOException {
 List<String> files = splitFileNames(wordFiles);
 CharArraySet words = null;
 if (files.size() > 0) {
  // default stopwords list has 35 or so words, but maybe don't make it that
  // big to start
  words = new CharArraySet(files.size() * 10, ignoreCase);
  for (String file : files) {
   InputStream stream = null;
   Reader reader = null;
   try {
    stream = loader.openResource(file.trim());
    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);
    reader = new InputStreamReader(stream, decoder);
    WordlistLoader.getSnowballWordSet(reader, words);
   } finally {
    IOUtils.closeWhileHandlingException(reader, stream);
   }
  }
 }
 return words;
}

/**
 * Returns as {@link CharArraySet} from wordFiles, which
 * can be a comma-separated list of filenames
 */
protected final CharArraySet getWordSet(ResourceLoader loader,
  String wordFiles, boolean ignoreCase) throws IOException {
 List<String> files = splitFileNames(wordFiles);
 CharArraySet words = null;
 if (files.size() > 0) {
  // default stopwords list has 35 or so words, but maybe don't make it that
  // big to start
  words = new CharArraySet(files.size() * 10, ignoreCase);
  for (String file : files) {
   List<String> wlist = getLines(loader, file.trim());
   words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
  }
 }
 return words;
}

/** Creates a new CapitalizationFilterFactory */
public CapitalizationFilterFactory(Map<String, String> args) {
 super(args);
 boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
 Set<String> k = getSet(args, KEEP);
 if (k != null) {
  keep = new CharArraySet(10, ignoreCase);
  keep.addAll(k);
 }
 k = getSet(args, OK_PREFIX);
 if (k != null) {
  okPrefix = new ArrayList<>();
  for (String item : k) {
   okPrefix.add(item.toCharArray());
  }
 }
 minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
 maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
 maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
 onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
 forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
 if (!args.isEmpty()) {
  throw new IllegalArgumentException("Unknown parameters: " + args);
 }
}

Javadoc

Create set with enough capacity to hold startSize terms

Popular methods of CharArraySet

contains
true if the len chars of text starting at off are in the set
copy
unmodifiableSet
Returns an unmodifiable CharArraySet. This allows to provide unmodifiable views of internal sets for
add
Add this char[] directly to the set. If ignoreCase is true for this Set, the text array will be dire
addAll
isEmpty
clear
Clears all entries in this set. This method is supported for reusing, but not Set#remove.
equals
getHashCode
getSlot
rehash
size

Popular in Java

Parsing JSON documents to java classes using gson
addToBackStack (FragmentTransaction)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
scheduleAtFixedRate (ScheduledExecutorService)
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
JButton (javax.swing)
JFrame (javax.swing)
Top plugins for Android Studio

How to use org.apache.lucene.analysis.CharArraySetconstructor

Best Java code snippets using org.apache.lucene.analysis.CharArraySet.<init> (Showing top 20 results out of 315)

How to use
org.apache.lucene.analysis.CharArraySet
constructor