/** * Gets the ngram probability of the word sequence represented by the word * list * * @param wordSequence - the word sequence * @return the probability of the word sequence. * Probability is in logMath log base */ @Override public float getProbability(WordSequence wordSequence) { int numberWords = wordSequence.size(); if (numberWords > maxDepth) { throw new Error("Unsupported NGram: " + wordSequence.size()); } if (numberWords == maxDepth) { Float probability = ngramProbCache.get(wordSequence); if (probability != null) { ngramHits++; return probability; } ngramMisses++; } float probability = applyWeights(getProbabilityRaw(wordSequence)); if (numberWords == maxDepth) ngramProbCache.put(wordSequence, probability); if (logFile != null) logFile.println(wordSequence.toString().replace("][", " ") + " : " + Float.toString(probability)); return probability; }
/** * Called by lexicon after recognition. * Used to clear caches */ public void onUtteranceEnd() { clearCache(); if (logFile != null) { logFile.println("<END_UTT>"); logFile.flush(); } }
/** * extracts raw word sequence probability without using caching, * making fresh LM trie traversing * @param wordSequence - sequence of words to get probability for * @return probability of specialized sequence of words */ private float getProbabilityRaw(WordSequence wordSequence) { int wordsNum = wordSequence.size(); int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 1)); TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next); float prob = unigrams[wordId].prob; curDepth = 1; if (wordsNum == 1) return prob; //find prob of ngrams of higher order if any prob = getAvailableProb(wordSequence, range, prob); if (curDepth < wordsNum) { //use backoff for rest of ngram prob += getAvailableBackoff(wordSequence); } return prob; }
NgramTrieModel model = new NgramTrieModel("", lm, null, 1.0f); dictionary.allocate(); model.allocate(); assertThat(model.getMaxDepth(), equalTo(3)); assertThat((double) model.getProbability(new WordSequence(words)), closeTo(-831, .001)); new Word("daines", null, false), new Word("david", null, false)}; assertThat((double) model.getProbability(new WordSequence(words1)), closeTo(-67637, .01));
buildUnigramIDMap(); ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize); loader.close();
buildUnigramIDMap(); ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize); loader.close();
/** * extracts raw word sequence probability without using caching, * making fresh LM trie traversing * @param wordSequence - sequence of words to get probability for * @return probability of specialized sequence of words */ private float getProbabilityRaw(WordSequence wordSequence) { int wordsNum = wordSequence.size(); int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 1)); TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next); float prob = unigrams[wordId].prob; curDepth = 1; if (wordsNum == 1) return prob; //find prob of ngrams of higher order if any prob = getAvailableProb(wordSequence, range, prob); if (curDepth < wordsNum) { //use backoff for rest of ngram prob += getAvailableBackoff(wordSequence); } return prob; }
/** * Gets the ngram probability of the word sequence represented by the word * list * * @param wordSequence - the word sequence * @return the probability of the word sequence. * Probability is in logMath log base */ @Override public float getProbability(WordSequence wordSequence) { int numberWords = wordSequence.size(); if (numberWords > maxDepth) { throw new Error("Unsupported NGram: " + wordSequence.size()); } if (numberWords == maxDepth) { Float probability = ngramProbCache.get(wordSequence); if (probability != null) { ngramHits++; return probability; } ngramMisses++; } float probability = applyWeights(getProbabilityRaw(wordSequence)); if (numberWords == maxDepth) ngramProbCache.put(wordSequence, probability); if (logFile != null) logFile.println(wordSequence.toString().replace("][", " ") + " : " + Float.toString(probability)); return probability; }
/** * Called by lexicon after recognition. * Used to clear caches */ public void onUtteranceEnd() { clearCache(); if (logFile != null) { logFile.println("<END_UTT>"); logFile.flush(); } }