edu.cmu.sphinx.linguist.language.ngram.trie.NgramTrieModel java code examples

/**
 * Gets the ngram probability of the word sequence represented by the word
 * list
 * 
 * @param wordSequence - the word sequence
 * @return the probability of the word sequence. 
 *         Probability is in logMath log base
 */
@Override
public float getProbability(WordSequence wordSequence) {
  int numberWords = wordSequence.size();
  if (numberWords > maxDepth) {
    throw new Error("Unsupported NGram: " + wordSequence.size());
  }
  if (numberWords == maxDepth) {
    Float probability = ngramProbCache.get(wordSequence);
    if (probability != null) {
      ngramHits++;
      return probability;
    }
    ngramMisses++;
  }
  float probability = applyWeights(getProbabilityRaw(wordSequence));
  if (numberWords == maxDepth)
    ngramProbCache.put(wordSequence, probability);
  if (logFile != null)
    logFile.println(wordSequence.toString().replace("][", " ") + " : "
        + Float.toString(probability));
  return probability;
}

/**
 *  Called by lexicon after recognition.
 *  Used to clear caches
 */
public void onUtteranceEnd() {
  clearCache();
  if (logFile != null) {
    logFile.println("<END_UTT>");
    logFile.flush();
  }
}

/**
 * extracts raw word sequence probability without using caching, 
 * making fresh LM trie traversing
 * @param wordSequence - sequence of words to get probability for
 * @return probability of specialized sequence of words
 */
private float getProbabilityRaw(WordSequence wordSequence) {
  int wordsNum = wordSequence.size();
  int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 1));
  TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next);
  float prob = unigrams[wordId].prob;
  curDepth = 1;
  if (wordsNum == 1)
    return prob;
  //find prob of ngrams of higher order if any
  prob = getAvailableProb(wordSequence, range, prob);
  if (curDepth < wordsNum) {
    //use backoff for rest of ngram
    prob += getAvailableBackoff(wordSequence);
  }
  return prob;
}

NgramTrieModel model = new NgramTrieModel("",
                     lm,
                     null,
                     1.0f);
dictionary.allocate();
model.allocate();
assertThat(model.getMaxDepth(), equalTo(3));
assertThat((double) model.getProbability(new WordSequence(words)),
      closeTo(-831, .001));
  new Word("daines", null, false),
  new Word("david", null, false)};
assertThat((double) model.getProbability(new WordSequence(words1)),
      closeTo(-67637, .01));

buildUnigramIDMap();
ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize);
loader.close();

buildUnigramIDMap();
ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize);
loader.close();

/**
 * extracts raw word sequence probability without using caching, 
 * making fresh LM trie traversing
 * @param wordSequence - sequence of words to get probability for
 * @return probability of specialized sequence of words
 */
private float getProbabilityRaw(WordSequence wordSequence) {
  int wordsNum = wordSequence.size();
  int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 1));
  TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next);
  float prob = unigrams[wordId].prob;
  curDepth = 1;
  if (wordsNum == 1)
    return prob;
  //find prob of ngrams of higher order if any
  prob = getAvailableProb(wordSequence, range, prob);
  if (curDepth < wordsNum) {
    //use backoff for rest of ngram
    prob += getAvailableBackoff(wordSequence);
  }
  return prob;
}

/**
 * Gets the ngram probability of the word sequence represented by the word
 * list
 * 
 * @param wordSequence - the word sequence
 * @return the probability of the word sequence. 
 *         Probability is in logMath log base
 */
@Override
public float getProbability(WordSequence wordSequence) {
  int numberWords = wordSequence.size();
  if (numberWords > maxDepth) {
    throw new Error("Unsupported NGram: " + wordSequence.size());
  }
  if (numberWords == maxDepth) {
    Float probability = ngramProbCache.get(wordSequence);
    if (probability != null) {
      ngramHits++;
      return probability;
    }
    ngramMisses++;
  }
  float probability = applyWeights(getProbabilityRaw(wordSequence));
  if (numberWords == maxDepth)
    ngramProbCache.put(wordSequence, probability);
  if (logFile != null)
    logFile.println(wordSequence.toString().replace("][", " ") + " : "
        + Float.toString(probability));
  return probability;
}

/**
 *  Called by lexicon after recognition.
 *  Used to clear caches
 */
public void onUtteranceEnd() {
  clearCache();
  if (logFile != null) {
    logFile.println("<END_UTT>");
    logFile.flush();
  }
}

Javadoc

Language model that uses a binary NGram language model file ("binary trie file") generated by the SphinxBase sphinx_lm_convert.

Most used methods

applyWeights
Applies weights to scores produced by language model
buildUnigramIDMap
Builds the map from unigram to unigramID. Also finds the startWordID and endWordID.
clearCache
Clears the various N-gram caches.
getAvailableBackoff
Selects backoffs for part of word sequence unused in #getAvailableProb(WordSequence,TrieRange,float)
getAvailableProb
Selects ngram of highest order available for specified word sequence and extracts probability for it
getProbabilityRaw
extracts raw word sequence probability without using caching, making fresh LM trie traversing
<init>
allocate
getMaxDepth
Returns the maximum depth of the language model
getProbability
Gets the ngram probability of the word sequence represented by the word list

Popular in Java

Reading from database using SQL prepared statement
putExtra (Intent)
addToBackStack (FragmentTransaction)
startActivity (Activity)
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
Permission (java.security)
Legacy security code; do not use.
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
JList (javax.swing)
Top PhpStorm plugins

How to useNgramTrieModel in edu.cmu.sphinx.linguist.language.ngram.trie

Best Java code snippets using edu.cmu.sphinx.linguist.language.ngram.trie.NgramTrieModel (Showing top 9 results out of 315)

How to use
NgramTrieModel
in
edu.cmu.sphinx.linguist.language.ngram.trie