Tabnine Logo
StringList.size
Code IndexAdd Tabnine to your IDE (free)

How to use
size
method
in
opennlp.tools.util.StringList

Best Java code snippets using opennlp.tools.util.StringList.size (Showing top 20 results out of 315)

origin: apache/opennlp

public boolean hasNext() {
 return index < size();
}
origin: apache/opennlp

/**
 * Adds the tokens to the dictionary as one new entry.
 *
 * @param tokens the new entry
 */
public void put(StringList tokens) {
 entrySet.add(new StringListWrapper(tokens));
 minTokenCount = Math.min(minTokenCount, tokens.size());
 maxTokenCount = Math.max(maxTokenCount, tokens.size());
}
origin: apache/opennlp

 @Override
 public String toString() {
  StringBuilder string = new StringBuilder();

  string.append('[');

  for (int i = 0; i < size(); i++) {
   string.append(getToken(i));

   if (i < size() - 1) {
    string.append(',');
   }
  }

  string.append(']');

  return string.toString();
 }
}
origin: apache/opennlp

private static int indexOf(StringList sentence, String token) {
 for (int i = 0; i < sentence.size(); i++) {
  if (token.equals(sentence.getToken(i))) {
   return i;
  }
 }
 return -1;
}
origin: apache/opennlp

private static StringList getNPlusOneNgram(StringList ngram, String word) {
 String[] tokens = new String[ngram.size() + 1];
 for (int i = 0; i < ngram.size(); i++) {
  tokens[i] = ngram.getToken(i);
 }
 tokens[tokens.length - 1] = word;
 return new StringList(tokens);
}
origin: apache/opennlp

/**
 * get the (n-1)th ngram of a given ngram, that is the same ngram except the last word in the ngram
 *
 * @param ngram a ngram
 * @return a ngram
 */
public static StringList getNMinusOneTokenFirst(StringList ngram) {
 String[] tokens = new String[ngram.size() - 1];
 for (int i = 0; i < ngram.size() - 1; i++) {
  tokens[i] = ngram.getToken(i);
 }
 return tokens.length > 0 ? new StringList(tokens) : null;
}
origin: apache/opennlp

/**
 * get the (n-1)th ngram of a given ngram, that is the same ngram except the first word in the ngram
 *
 * @param ngram a ngram
 * @return a ngram
 */
public static StringList getNMinusOneTokenLast(StringList ngram) {
 String[] tokens = new String[ngram.size() - 1];
 for (int i = 1; i < ngram.size(); i++) {
  tokens[i - 1] = ngram.getToken(i);
 }
 return tokens.length > 0 ? new StringList(tokens) : null;
}
origin: apache/opennlp

/**
 * Initializes the current instance with the given
 * {@link StringList} {@link Iterator}.
 *
 * @param tokenLists
 */
public Index(Iterator<StringList> tokenLists) {
 while (tokenLists.hasNext()) {
  StringList tokens = tokenLists.next();
  for (int i = 0; i < tokens.size(); i++) {
   this.tokens.add(tokens.getToken(i));
  }
 }
}
origin: apache/opennlp

@Override
public String[] predictNextTokens(String... tokens) {
 double maxProb = Double.NEGATIVE_INFINITY;
 String[] token = null;
 for (StringList ngram : this) {
  String[] sequence = new String[ngram.size() + tokens.length];
  for (int i = 0; i < tokens.length; i++) {
   sequence[i] = tokens[i];
  }
  for (int i = 0; i < ngram.size(); i++) {
   sequence[i + tokens.length] = ngram.getToken(i);
  }
  double v = calculateProbability(sequence);
  if (v > maxProb) {
   maxProb = v;
   token = new String[ngram.size()];
   for (int i = 0; i < ngram.size(); i++) {
    token[i] = ngram.getToken(i);
   }
  }
 }
 return token;
}
origin: apache/opennlp

/**
 * Compares to tokens list and ignores the case of the tokens.
 *
 * Note: This can cause problems with some locals.
 *
 * @param tokens
 *
 * @return true if identically with ignore the case otherwise false
 */
public boolean compareToIgnoreCase(StringList tokens) {
 if (size() == tokens.size()) {
  for (int i = 0; i < size(); i++) {
   if (getToken(i).compareToIgnoreCase(
     tokens.getToken(i)) != 0) {
    return false;
   }
  }
 }
 else {
  return false;
 }
 return true;
}
origin: apache/opennlp

@Override
public StringList predictNextTokens(StringList tokens) {
 double maxProb = Double.NEGATIVE_INFINITY;
 StringList token = null;
 for (StringList ngram : this) {
  String[] sequence = new String[ngram.size() + tokens.size()];
  for (int i = 0; i < tokens.size(); i++) {
   sequence[i] = tokens.getToken(i);
  }
  for (int i = 0; i < ngram.size(); i++) {
   sequence[i + tokens.size()] = ngram.getToken(i);
  }
  StringList sample = new StringList(sequence);
  double v = calculateProbability(sample);
  if (v > maxProb) {
   maxProb = v;
   token = ngram;
  }
 }
 return token;
}
origin: apache/opennlp

private static Double count(StringList ngram, Iterable<StringList> sentences) {
 Double count = 0d;
 for (StringList sentence : sentences) {
  int idx0 = indexOf(sentence, ngram.getToken(0));
  if (idx0 >= 0 && sentence.size() >= idx0 + ngram.size()) {
   boolean match = true;
   for (int i = 1; i < ngram.size(); i++) {
    String sentenceToken = sentence.getToken(idx0 + i);
    String ngramToken = ngram.getToken(i);
    match &= sentenceToken.equals(ngramToken);
   }
   if (match) {
    count++;
   }
  }
 }
 return count;
}
origin: apache/opennlp

/**
 * Get the ngrams of dimension n of a certain input sequence of tokens.
 *
 * @param sequence a sequence of tokens
 * @param size     the size of the resulting ngrmams
 * @return all the possible ngrams of the given size derivable from the input sequence
 */
public static Collection<StringList> getNGrams(StringList sequence, int size) {
 Collection<StringList> ngrams = new LinkedList<>();
 if (size == -1 || size >= sequence.size()) {
  ngrams.add(sequence);
 } else {
  String[] ngram = new String[size];
  for (int i = 0; i < sequence.size() - size + 1; i++) {
   ngram[0] = sequence.getToken(i);
   for (int j = 1; j < size; j++) {
    ngram[j] = sequence.getToken(i + j);
   }
   ngrams.add(new StringList(ngram));
  }
 }
 return ngrams;
}
origin: apache/opennlp

/**
 * calculate the probability of a unigram in a vocabulary using maximum likelihood estimation
 *
 * @param word the only word in the unigram
 * @param set  the vocabulary
 * @return the maximum likelihood probability
 */
public static double calculateUnigramMLProbability(String word, Collection<StringList> set) {
 double vocSize = 0d;
 for (StringList s : set) {
  vocSize += s.size();
 }
 return count(new StringList(word), set) / vocSize;
}
origin: apache/opennlp

 /**
  * Generates the context for a document using character ngrams.
  * @param document document to extract context from
  * @return the generated context
  */
 @Override
 public String[] getContext(CharSequence document) {
  Collection<String> context = new ArrayList<>();

  NGramModel model = new NGramModel();
  model.add(normalizer.normalize(document), minLength, maxLength);

  for (StringList tokenList : model) {
   if (tokenList.size() > 0) {
    context.add(tokenList.getToken(0));
   }
  }
  return context.toArray(new String[context.size()]);
 }
}
origin: apache/opennlp

 public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
  NGramModel model = new NGramModel();
  model.add(tokens[index], minLength, maxLength);

  for (StringList tokenList : model) {
   if (tokenList.size() > 0) {
    features.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0)));
   }
  }
 }
}
origin: apache/opennlp

private void init(InputStream in) throws IOException {
 DictionaryEntryPersistor.create(in, entry -> {
  String operationString = entry.getAttributes().getValue("operation");
  StringList word = entry.getTokens();
  if (word.size() != 1)
   throw new InvalidFormatException("Each entry must have exactly one token! " + word);
  // parse operation
  Operation operation = Operation.parse(operationString);
  if (operation == null)
   throw new InvalidFormatException("Unknown operation type: " + operationString);
  operationTable.put(word.getToken(0), operation);
 });
}
origin: apache/opennlp

private double stupidBackoff(StringList ngram) {
 int count = getCount(ngram);
 StringList nMinusOneToken = NGramUtils.getNMinusOneTokenFirst(ngram);
 if (nMinusOneToken == null || nMinusOneToken.size() == 0) {
  return (double) count / (double) size();
 } else if (count > 0) {
  double countM1 = getCount(nMinusOneToken);
  if (countM1 == 0d) {
   countM1 = size(); // to avoid Infinite if n-1grams do not exist
  }
  return (double) count / countM1;
 } else {
  return 0.4 * stupidBackoff(NGramUtils.getNMinusOneTokenLast(ngram));
 }
}
origin: apache/opennlp

 @Override
 public String[] getContext(CharSequence document) {
  String[] superContext = super.getContext(document);
  List<String> context = new ArrayList(Arrays.asList(superContext));
  document = this.normalizer.normalize(document);
  SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
  String[] words = tokenizer.tokenize(document.toString());
  NGramModel tokenNgramModel = new NGramModel();
  if (words.length > 0) {
   tokenNgramModel.add(new StringList(words), 1, 3);
   Iterator tokenNgramIterator = tokenNgramModel.iterator();
   while (tokenNgramIterator.hasNext()) {
    StringList tokenList = (StringList) tokenNgramIterator.next();
    if (tokenList.size() > 0) {
     context.add("tg=" + tokenList.toString());
    }
   }
  }
  return context.toArray(new String[context.size()]);
 }
}
origin: apache/opennlp

/**
 * Tests {@link StringList#getToken(int)}.
 */
@Test
public void testGetToken() {
 StringList l = new StringList("a", "b");
 Assert.assertEquals(2, l.size());
 Assert.assertEquals("a", l.getToken(0));
 Assert.assertEquals("b", l.getToken(1));
}
opennlp.tools.utilStringListsize

Javadoc

Retrieves the number of tokens inside this list.

Popular methods of StringList

  • <init>
    Initializes the current instance. Note: Token Strings will be replaced by identical internal String
  • getToken
    Retrieves a token from the given index.
  • compareToIgnoreCase
    Compares to tokens list and ignores the case of the tokens. Note: This can cause problems with some
  • equals
  • toString
  • iterator
    Retrieves an Iterator over all tokens.
  • hashCode

Popular in Java

  • Start an intent from android
  • getExternalFilesDir (Context)
  • getSharedPreferences (Context)
  • compareTo (BigDecimal)
  • Kernel (java.awt.image)
  • ByteBuffer (java.nio)
    A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
  • Comparator (java.util)
    A Comparator is used to compare two objects to determine their ordering with respect to each other.
  • Date (java.util)
    A specific moment in time, with millisecond precision. Values typically come from System#currentTime
  • Vector (java.util)
    Vector is an implementation of List, backed by an array and synchronized. All optional operations in
  • JFileChooser (javax.swing)
  • CodeWhisperer alternatives
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now