opennlp.tools.util.StringList.getToken java code examples

private static int indexOf(StringList sentence, String token) {
 for (int i = 0; i < sentence.size(); i++) {
  if (token.equals(sentence.getToken(i))) {
   return i;
  }
 }
 return -1;
}

public String next() {
 if (hasNext()) {
  return getToken(index++);
 }
 else {
  throw new NoSuchElementException();
 }
}

/**
 * Initializes the current instance with the given
 * {@link StringList} {@link Iterator}.
 *
 * @param tokenLists
 */
public Index(Iterator<StringList> tokenLists) {
 while (tokenLists.hasNext()) {
  StringList tokens = tokenLists.next();
  for (int i = 0; i < tokens.size(); i++) {
   this.tokens.add(tokens.getToken(i));
  }
 }
}

 @Override
 public String toString() {
  StringBuilder string = new StringBuilder();

  string.append('[');

  for (int i = 0; i < size(); i++) {
   string.append(getToken(i));

   if (i < size() - 1) {
    string.append(',');
   }
  }

  string.append(']');

  return string.toString();
 }
}

private static Double count(StringList ngram, Iterable<StringList> sentences) {
 Double count = 0d;
 for (StringList sentence : sentences) {
  int idx0 = indexOf(sentence, ngram.getToken(0));
  if (idx0 >= 0 && sentence.size() >= idx0 + ngram.size()) {
   boolean match = true;
   for (int i = 1; i < ngram.size(); i++) {
    String sentenceToken = sentence.getToken(idx0 + i);
    String ngramToken = ngram.getToken(i);
    match &= sentenceToken.equals(ngramToken);
   }
   if (match) {
    count++;
   }
  }
 }
 return count;
}

/**
 * Get the ngrams of dimension n of a certain input sequence of tokens.
 *
 * @param sequence a sequence of tokens
 * @param size     the size of the resulting ngrmams
 * @return all the possible ngrams of the given size derivable from the input sequence
 */
public static Collection<StringList> getNGrams(StringList sequence, int size) {
 Collection<StringList> ngrams = new LinkedList<>();
 if (size == -1 || size >= sequence.size()) {
  ngrams.add(sequence);
 } else {
  String[] ngram = new String[size];
  for (int i = 0; i < sequence.size() - size + 1; i++) {
   ngram[0] = sequence.getToken(i);
   for (int j = 1; j < size; j++) {
    ngram[j] = sequence.getToken(i + j);
   }
   ngrams.add(new StringList(ngram));
  }
 }
 return ngrams;
}

/**
 * Compares to tokens list and ignores the case of the tokens.
 *
 * Note: This can cause problems with some locals.
 *
 * @param tokens
 *
 * @return true if identically with ignore the case otherwise false
 */
public boolean compareToIgnoreCase(StringList tokens) {
 if (size() == tokens.size()) {
  for (int i = 0; i < size(); i++) {
   if (getToken(i).compareToIgnoreCase(
     tokens.getToken(i)) != 0) {
    return false;
   }
  }
 }
 else {
  return false;
 }
 return true;
}

 /**
  * Generates the context for a document using character ngrams.
  * @param document document to extract context from
  * @return the generated context
  */
 @Override
 public String[] getContext(CharSequence document) {
  Collection<String> context = new ArrayList<>();

  NGramModel model = new NGramModel();
  model.add(normalizer.normalize(document), minLength, maxLength);

  for (StringList tokenList : model) {
   if (tokenList.size() > 0) {
    context.add(tokenList.getToken(0));
   }
  }
  return context.toArray(new String[context.size()]);
 }
}

 public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
  NGramModel model = new NGramModel();
  model.add(tokens[index], minLength, maxLength);

  for (StringList tokenList : model) {
   if (tokenList.size() > 0) {
    features.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0)));
   }
  }
 }
}

/**
 * get the (n-1)th ngram of a given ngram, that is the same ngram except the last word in the ngram
 *
 * @param ngram a ngram
 * @return a ngram
 */
public static StringList getNMinusOneTokenFirst(StringList ngram) {
 String[] tokens = new String[ngram.size() - 1];
 for (int i = 0; i < ngram.size() - 1; i++) {
  tokens[i] = ngram.getToken(i);
 }
 return tokens.length > 0 ? new StringList(tokens) : null;
}

private static StringList getNPlusOneNgram(StringList ngram, String word) {
 String[] tokens = new String[ngram.size() + 1];
 for (int i = 0; i < ngram.size(); i++) {
  tokens[i] = ngram.getToken(i);
 }
 tokens[tokens.length - 1] = word;
 return new StringList(tokens);
}

/**
 * Adds NGrams up to the specified length to the current instance.
 *
 * @param ngram the tokens to build the uni-grams, bi-grams, tri-grams, ..
 *     from.
 * @param minLength - minimal length
 * @param maxLength - maximal length
 */
public void add(StringList ngram, int minLength, int maxLength) {
 if (minLength < 1 || maxLength < 1)
  throw new IllegalArgumentException("minLength and maxLength param must be at least 1. " +
    "minLength=" + minLength + ", maxLength= " + maxLength);
 if (minLength > maxLength)
  throw new IllegalArgumentException("minLength param must not be larger than " +
    "maxLength param. minLength=" + minLength + ", maxLength= " + maxLength);
 for (int lengthIndex = minLength; lengthIndex < maxLength + 1; lengthIndex++) {
  for (int textIndex = 0;
    textIndex + lengthIndex - 1 < ngram.size(); textIndex++) {
   String[] grams = new String[lengthIndex];
   for (int i = textIndex; i < textIndex + lengthIndex; i++) {
    grams[i - textIndex] = ngram.getToken(i);
   }
   add(new StringList(grams));
  }
 }
}

/**
 * get the (n-1)th ngram of a given ngram, that is the same ngram except the first word in the ngram
 *
 * @param ngram a ngram
 * @return a ngram
 */
public static StringList getNMinusOneTokenLast(StringList ngram) {
 String[] tokens = new String[ngram.size() - 1];
 for (int i = 1; i < ngram.size(); i++) {
  tokens[i - 1] = ngram.getToken(i);
 }
 return tokens.length > 0 ? new StringList(tokens) : null;
}

@Override
public String[] predictNextTokens(String... tokens) {
 double maxProb = Double.NEGATIVE_INFINITY;
 String[] token = null;
 for (StringList ngram : this) {
  String[] sequence = new String[ngram.size() + tokens.length];
  for (int i = 0; i < tokens.length; i++) {
   sequence[i] = tokens[i];
  }
  for (int i = 0; i < ngram.size(); i++) {
   sequence[i + tokens.length] = ngram.getToken(i);
  }
  double v = calculateProbability(sequence);
  if (v > maxProb) {
   maxProb = v;
   token = new String[ngram.size()];
   for (int i = 0; i < ngram.size(); i++) {
    token[i] = ngram.getToken(i);
   }
  }
 }
 return token;
}

  throw new InvalidFormatException("Each entry must have exactly one token! " + word);
 newPosDict.dictionary.put(word.getToken(0), tags);
});

@Override
public StringList predictNextTokens(StringList tokens) {
 double maxProb = Double.NEGATIVE_INFINITY;
 StringList token = null;
 for (StringList ngram : this) {
  String[] sequence = new String[ngram.size() + tokens.size()];
  for (int i = 0; i < tokens.size(); i++) {
   sequence[i] = tokens.getToken(i);
  }
  for (int i = 0; i < ngram.size(); i++) {
   sequence[i + tokens.size()] = ngram.getToken(i);
  }
  StringList sample = new StringList(sequence);
  double v = calculateProbability(sample);
  if (v > maxProb) {
   maxProb = v;
   token = ngram;
  }
 }
 return token;
}

/**
 * Tests {@link StringList} which uses {@link String#intern}.
 */
@Test
public void testIntern() {
 StringList l1 = new StringList("a");
 StringList l2 = new StringList("a", "b");
 Assert.assertTrue(l1.getToken(0) == l2.getToken(0));
}

private void init(InputStream in) throws IOException {
 DictionaryEntryPersistor.create(in, entry -> {
  String operationString = entry.getAttributes().getValue("operation");
  StringList word = entry.getTokens();
  if (word.size() != 1)
   throw new InvalidFormatException("Each entry must have exactly one token! " + word);
  // parse operation
  Operation operation = Operation.parse(operationString);
  if (operation == null)
   throw new InvalidFormatException("Unknown operation type: " + operationString);
  operationTable.put(word.getToken(0), operation);
 });
}

/**
 * Tests {@link StringList#getToken(int)}.
 */
@Test
public void testGetToken() {
 StringList l = new StringList("a", "b");
 Assert.assertEquals(2, l.size());
 Assert.assertEquals("a", l.getToken(0));
 Assert.assertEquals("b", l.getToken(1));
}

assertEquals("Smith", personName.getToken(0));
personName = sampleStream.read();
assertNotNull(personName);
assertEquals("Johnson", personName.getToken(0));
personName = sampleStream.read();
assertNotNull(personName);
assertEquals("Williams", personName.getToken(0));
personName = sampleStream.read();
assertNotNull(personName);
assertEquals("Jones", personName.getToken(0));
personName = sampleStream.read();
assertNotNull(personName);
assertEquals("Brown", personName.getToken(0));
assertEquals("Mary", personName.getToken(0));
personName = sampleStream.read();
assertNotNull(personName);
assertEquals("Patricia", personName.getToken(0));
personName = sampleStream.read();
assertNotNull(personName);
assertEquals("Linda", personName.getToken(0));
personName = sampleStream.read();
assertNotNull(personName);
assertEquals("Barbara", personName.getToken(0));
personName = sampleStream.read();
assertNotNull(personName);
assertEquals("Elizabeth", personName.getToken(0));
assertEquals("James", personName.getToken(0));

Javadoc

Retrieves a token from the given index.

Popular methods of StringList

<init>
Initializes the current instance. Note: Token Strings will be replaced by identical internal String
size
Retrieves the number of tokens inside this list.
compareToIgnoreCase
Compares to tokens list and ignores the case of the tokens. Note: This can cause problems with some
equals
toString
iterator
Retrieves an Iterator over all tokens.
hashCode

Popular in Java

Creating JSON documents from java classes using gson
onCreateOptionsMenu (Activity)
startActivity (Activity)
getSystemService (Context)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
JList (javax.swing)
JTextField (javax.swing)
Top plugins for Android Studio

How to use getTokenmethodin opennlp.tools.util.StringList

Best Java code snippets using opennlp.tools.util.StringList.getToken (Showing top 20 results out of 315)

How to use
getToken
method
in
opennlp.tools.util.StringList