org.apache.lucene.analysis.CharArraySet.contains java code examples

/**
 * Returns the next input Token whose term() is not a stop word.
 */
@Override
protected boolean accept() {
 return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
}

final CharArraySet stopWords = SearchFieldAnalyzer.getStopWords();
for (String word : words) {
  if (stopWords.contains(word)) {
    continue;

 @Override
 public boolean matches(char s[], int len) {
  return super.matches(s, len) && !exceptions.contains(s, 0, len);
 }
}

private int rule17(char s[], int len) {
 if (len > 4 && endsWith(s, len, "ηστε")) {
  len -= 4;
  if (exc17.contains(s, 0, len))
   len += 3; // add back the -ηστ
 }
 
 return len;
}

private int rule12(char s[], int len) {
 if (len > 5 && endsWith(s, len, "ιεστε")) {
  len -= 5;
  if (exc12a.contains(s, 0, len))   
   len += 4; // add back -ιεστ
 }
 
 if (len > 4 && endsWith(s, len, "εστε")) {
  len -= 4;
  if (exc12b.contains(s, 0, len))
   len += 3; // add back -εστ
 }
 
 return len;
}

 @Override
 public boolean accept() {
  return words.contains(termAtt.buffer(), 0, termAtt.length());
 }
}

@Override
protected boolean shouldFilter() {
 boolean b = protectedTerms.contains(termAtt.buffer(), 0, termAtt.length());
 return b == false;
}

private int rule4(char s[], int len) {   
 if (len > 3 && (endsWith(s, len, "εωσ") || endsWith(s, len, "εων"))) {
  len -= 3;
  if (exc4.contains(s, 0, len))
   len++; // add back -ε
 }
 return len;
}

/**
 * Determines if the current token is a common term
 *
 * @return {@code true} if the current token is a common term, {@code false} otherwise
 */
private boolean isCommon() {
 return commonWords != null && commonWords.contains(termAttribute.buffer(), 0, termAttribute.length());
}

@Override
protected boolean isKeyword() {
 return keywordSet.contains(termAtt.buffer(), 0, termAtt.length());
}

/**
 * Find the unique stem(s) of the provided word
 * 
 * @param word Word to find the stems for
 * @return List of stems for the word
 */
public List<CharsRef> uniqueStems(char word[], int length) {
 List<CharsRef> stems = stem(word, length);
 if (stems.size() < 2) {
  return stems;
 }
 CharArraySet terms = new CharArraySet(8, dictionary.ignoreCase);
 List<CharsRef> deduped = new ArrayList<>();
 for (CharsRef s : stems) {
  if (!terms.contains(s)) {
   deduped.add(s);
   terms.add(s);
  }
 }
 return deduped;
}

private int rule16(char s[], int len) {
 boolean removed = false;
 if (len > 4 && endsWith(s, len, "ησου")) {
  len -= 4;
  removed = true;
 } else if (len > 3 && (endsWith(s, len, "ησε") || endsWith(s, len, "ησα"))) {
  len -= 3;
  removed = true;
 }
 
 if (removed && exc16.contains(s, 0, len))
  len += 2; // add back -ησ
 
 return len;
}

 /**
  * Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start
  */
 @Override
 public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
   char[] termBuffer = termAtt.buffer();
   int termLength = termAtt.length();

   int index = -1;
   for (int i = 0; i < termLength; i++) {
    char ch = termBuffer[i];
    if (ch == '\'' || ch == '\u2019') {
     index = i;
     break;
    }
   }

   // An apostrophe has been found. If the prefix is an article strip it off.
   if (index >= 0 && articles.contains(termBuffer, 0, index)) {
    termAtt.copyBuffer(termBuffer, index + 1, termLength - (index + 1));
   }

   return true;
  } else {
   return false;
  }
 }
}

private int rule19(char s[], int len) {
 boolean removed = false;
 
 if (len > 6 && (endsWith(s, len, "ησουμε") || endsWith(s, len, "ηθουμε"))) {
  len -= 6;
  removed = true;
 } else if (len > 4 && endsWith(s, len, "ουμε")) {
  len -= 4;
  removed = true;
 }
 
 if (removed && exc19.contains(s, 0, len)) {
  len += 3;
  s[len - 3] = 'ο';
  s[len - 2] = 'υ';
  s[len - 1] = 'μ';
 }
 return len;
}

private int rule18(char s[], int len) {
 boolean removed = false;
 
 if (len > 6 && (endsWith(s, len, "ησουνε") || endsWith(s, len, "ηθουνε"))) {
  len -= 6;
  removed = true;
 } else if (len > 4 && endsWith(s, len, "ουνε")) {
  len -= 4;
  removed = true;
 }
 
 if (removed && exc18.contains(s, 0, len)) {
  len += 3;
  s[len - 3] = 'ο';
  s[len - 2] = 'υ';
  s[len - 1] = 'ν';
 }
 return len;
}

@Override
public boolean incrementToken() throws IOException {
 while (input.incrementToken()) {
  final char term[] = termAttribute.buffer();
  final int length = termAttribute.length();
  final int posIncrement = posIncAttribute.getPositionIncrement();
  
  if (posIncrement > 0) {
   previous.clear();
  }
  
  boolean duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
  
  // clone the term, and add to the set of seen terms.
  char saved[] = new char[length];
  System.arraycopy(term, 0, saved, 0, length);
  previous.add(saved);
  
  if (!duplicate) {
   return true;
  }
 }
 return false;
}

private int rule6(char s[], int len) {
 boolean removed = false;
 if (len > 3 && (endsWith(s, len, "ικα") || endsWith(s, len, "ικο"))) {
  len -= 3;
  removed = true;
 } else if (len > 4 && (endsWith(s, len, "ικου") || endsWith(s, len, "ικων"))) {
  len -= 4;
  removed = true;
 }
 
 if (removed) {
  if (endsWithVowel(s, len) || exc6.contains(s, 0, len))
   len += 2; // add back -ικ
 }
 return len;
}

private int rule7(char s[], int len) {
 if (len == 5 && endsWith(s, len, "αγαμε"))
  return len - 1;
 
 if (len > 7 && endsWith(s, len, "ηθηκαμε"))
  len -= 7;
 else if (len > 6 && endsWith(s, len, "ουσαμε"))
  len -= 6;
 else if (len > 5 && (endsWith(s, len, "αγαμε") ||
      endsWith(s, len, "ησαμε") ||
      endsWith(s, len, "ηκαμε")))
  len -= 5;
 
 if (len > 3 && endsWith(s, len, "αμε")) {
  len -= 3;
  if (exc7.contains(s, 0, len))
   len += 2; // add back -αμ
 }
 return len;
}

private int rule13(char s[], int len) {
 if (len > 6 && endsWith(s, len, "ηθηκεσ")) {
  len -= 6;
 } else if (len > 5 && (endsWith(s, len, "ηθηκα") || endsWith(s, len, "ηθηκε"))) {
  len -= 5;
 }
 
 boolean removed = false;
 
 if (len > 4 && endsWith(s, len, "ηκεσ")) {
  len -= 4;
  removed = true;
 } else if (len > 3 && (endsWith(s, len, "ηκα") || endsWith(s, len, "ηκε"))) {
  len -= 3;
  removed = true;
 }
 if (removed && (exc13.contains(s, 0, len) 
   || endsWith(s, len, "σκωλ")
   || endsWith(s, len, "σκουλ")
   || endsWith(s, len, "ναρθ")
   || endsWith(s, len, "σφ")
   || endsWith(s, len, "οθ")
   || endsWith(s, len, "πιθ"))) { 
  len += 2; // add back the -ηκ
 }
 
 return len;
}

private int rule14(char s[], int len) {
 boolean removed = false;
 
 if (len > 5 && endsWith(s, len, "ουσεσ")) {
  len -= 5;
  removed = true;
 } else if (len > 4 && (endsWith(s, len, "ουσα") || endsWith(s, len, "ουσε"))) {
  len -= 4;
  removed = true;
 }
 
 if (removed && (exc14.contains(s, 0, len) 
   || endsWithVowel(s, len)
   || endsWith(s, len, "ποδαρ")
   || endsWith(s, len, "βλεπ")
   || endsWith(s, len, "πανταχ")
   || endsWith(s, len, "φρυδ") 
   || endsWith(s, len, "μαντιλ")
   || endsWith(s, len, "μαλλ")
   || endsWith(s, len, "κυματ")
   || endsWith(s, len, "λαχ")
   || endsWith(s, len, "ληγ")
   || endsWith(s, len, "φαγ")
   || endsWith(s, len, "ομ")
   || endsWith(s, len, "πρωτ"))) {
  len += 3; // add back -ουσ
 }
 return len;
}

Javadoc

true if the CharSequence is in the set

Popular methods of CharArraySet

<init>
copy
unmodifiableSet
Returns an unmodifiable CharArraySet. This allows to provide unmodifiable views of internal sets for
add
Add this char[] directly to the set. If ignoreCase is true for this Set, the text array will be dire
addAll
isEmpty
clear
Clears all entries in this set. This method is supported for reusing, but not Set#remove.
equals
getHashCode
getSlot
rehash
size

Popular in Java

Reading from database using SQL prepared statement
addToBackStack (FragmentTransaction)
putExtra (Intent)
getSystemService (Context)
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
From CI to AI: The AI layer in your organization

How to use containsmethodin org.apache.lucene.analysis.CharArraySet

Best Java code snippets using org.apache.lucene.analysis.CharArraySet.contains (Showing top 20 results out of 315)

How to use
contains
method
in
org.apache.lucene.analysis.CharArraySet