opennlp.tools.util.StringList java code examples

private static StringList getNPlusOneNgram(StringList ngram, String word) {
 String[] tokens = new String[ngram.size() + 1];
 for (int i = 0; i < ngram.size(); i++) {
  tokens[i] = ngram.getToken(i);
 }
 tokens[tokens.length - 1] = word;
 return new StringList(tokens);
}

/**
 * Tests {@link StringList#compareToIgnoreCase(StringList)}.
 */
@Test
public void testCompareToIgnoreCase() {
 Assert.assertTrue(new StringList("a", "b").compareToIgnoreCase(
   new StringList("A", "B")));
}

/**
 * Tests {@link StringList#equals(Object)}.
 */
@Test
public void testEquals() {
 Assert.assertEquals(new StringList("a", "b"),
   new StringList("a", "b"));
 Assert.assertFalse(new StringList("a", "b").equals(
   new StringList("A", "B")));
}

private static int indexOf(StringList sentence, String token) {
 for (int i = 0; i < sentence.size(); i++) {
  if (token.equals(sentence.getToken(i))) {
   return i;
  }
 }
 return -1;
}

/**
 * calculate the probability of a unigram in a vocabulary using maximum likelihood estimation
 *
 * @param word the only word in the unigram
 * @param set  the vocabulary
 * @return the maximum likelihood probability
 */
public static double calculateUnigramMLProbability(String word, Collection<StringList> set) {
 double vocSize = 0d;
 for (StringList s : set) {
  vocSize += s.size();
 }
 return count(new StringList(word), set) / vocSize;
}

 @Override
 public boolean contains(Object obj) {
  boolean result = false;
  if (obj instanceof String) {
   String str = (String) obj;
   result = entrySet.contains(new StringListWrapper(new StringList(str)));
  }
  return result;
 }
};

 @Override
 public String[] getContext(CharSequence document) {
  String[] superContext = super.getContext(document);
  List<String> context = new ArrayList(Arrays.asList(superContext));
  document = this.normalizer.normalize(document);
  SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
  String[] words = tokenizer.tokenize(document.toString());
  NGramModel tokenNgramModel = new NGramModel();
  if (words.length > 0) {
   tokenNgramModel.add(new StringList(words), 1, 3);
   Iterator tokenNgramIterator = tokenNgramModel.iterator();
   while (tokenNgramIterator.hasNext()) {
    StringList tokenList = (StringList) tokenNgramIterator.next();
    if (tokenList.size() > 0) {
     context.add("tg=" + tokenList.toString());
    }
   }
  }
  return context.toArray(new String[context.size()]);
 }
}

/**
 * Tests {@link StringList} which uses {@link String#intern}.
 */
@Test
public void testIntern() {
 StringList l1 = new StringList("a");
 StringList l2 = new StringList("a", "b");
 Assert.assertTrue(l1.getToken(0) == l2.getToken(0));
}

 /**
  * Tests {@link StringList#toString()}.
  */
 @Test
 public void testToString() {
  Assert.assertEquals("[a]", new StringList("a").toString());
  Assert.assertEquals("[a,b]", new StringList("a", "b").toString());
 }
}

/**
 * Tests {@link StringList#hashCode()}.
 */
@Test
public void testHashCode() {
 Assert.assertEquals(new StringList("a", "b").hashCode(),
   new StringList("a", "b").hashCode());
 Assert.assertNotEquals(new StringList("a", "b").hashCode(),
   new StringList("a", "c").hashCode());
}

/**
 * Tests {@link StringList#iterator()}.
 */
@Test
public void testIterator() {
 StringList l = new StringList("a");
 Iterator<String> it = l.iterator();
 Assert.assertTrue(it.hasNext());
 Assert.assertEquals("a", it.next());
 Assert.assertFalse(it.hasNext());
 // now test with more than one string
 l = new StringList("a", "b", "c");
 it = l.iterator();
 Assert.assertTrue(it.hasNext());
 Assert.assertEquals("a", it.next());
 Assert.assertTrue(it.hasNext());
 Assert.assertEquals("b", it.next());
 Assert.assertTrue(it.hasNext());
 Assert.assertEquals("c", it.next());
 Assert.assertFalse(it.hasNext());
}

public boolean hasNext() {
 return index < size();
}

@Override
public boolean equals(Object obj) {
 boolean result;
 if (obj == this) {
  result = true;
 }
 else if (obj instanceof StringListWrapper) {
  StringListWrapper other = (StringListWrapper) obj;
  if (isCaseSensitive) {
   result = this.stringList.equals(other.getStringList());
  }
  else {
   result = this.stringList.compareToIgnoreCase(other.getStringList());
  }
 }
 else {
  result = false;
 }
 return result;
}

public String next() {
 if (hasNext()) {
  return getToken(index++);
 }
 else {
  throw new NoSuchElementException();
 }
}

 @Override
 public String toString() {
  return this.stringList.toString();
 }
}

for (Iterator<String> it = tokens.iterator(); it.hasNext(); ) {

/**
 * calculate the probability of a bigram in a vocabulary using maximum likelihood estimation
 *
 * @param x0  first word in the bigram
 * @param x1  second word in the bigram
 * @param set the vocabulary
 * @return the maximum likelihood probability
 */
public static double calculateBigramMLProbability(String x0, String x1, Collection<StringList> set) {
 return calculateNgramMLProbability(new StringList(x0, x1), set);
}

private static Double count(StringList ngram, Iterable<StringList> sentences) {
 Double count = 0d;
 for (StringList sentence : sentences) {
  int idx0 = indexOf(sentence, ngram.getToken(0));
  if (idx0 >= 0 && sentence.size() >= idx0 + ngram.size()) {
   boolean match = true;
   for (int i = 1; i < ngram.size(); i++) {
    String sentenceToken = sentence.getToken(idx0 + i);
    String ngramToken = ngram.getToken(i);
    match &= sentenceToken.equals(ngramToken);
   }
   if (match) {
    count++;
   }
  }
 }
 return count;
}

/**
 * calculate the probability of a unigram in a vocabulary using maximum likelihood estimation
 *
 * @param word the only word in the unigram
 * @param set  the vocabulary
 * @return the maximum likelihood probability
 */
public static double calculateUnigramMLProbability(String word, Collection<StringList> set) {
 double vocSize = 0d;
 for (StringList s : set) {
  vocSize += s.size();
 }
 return count(new StringList(word), set) / vocSize;
}

/**
 * Adds the tokens to the dictionary as one new entry.
 *
 * @param tokens the new entry
 */
public void put(StringList tokens) {
 entrySet.add(new StringListWrapper(tokens));
 minTokenCount = Math.min(minTokenCount, tokens.size());
 maxTokenCount = Math.max(maxTokenCount, tokens.size());
}

Javadoc

The StringList is an immutable list of Strings.

Most used methods

<init>
Initializes the current instance. Note: Token Strings will be replaced by identical internal String
getToken
Retrieves a token from the given index.
size
Retrieves the number of tokens inside this list.
compareToIgnoreCase
Compares to tokens list and ignores the case of the tokens. Note: This can cause problems with some
equals
toString
iterator
Retrieves an Iterator over all tokens.
hashCode

Popular in Java

Creating JSON documents from java classes using gson
getSupportFragmentManager (FragmentActivity)
onRequestPermissionsResult (Fragment)
getSystemService (Context)
PrintStream (java.io)
Fake signature of an existing Java class.
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
JFrame (javax.swing)
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Top plugins for WebStorm

How to useStringList in opennlp.tools.util

Best Java code snippets using opennlp.tools.util.StringList (Showing top 20 results out of 315)

How to use
StringList
in
opennlp.tools.util