edu.cmu.sphinx.linguist.language.ngram.large.NGramBuffer java code examples

/**
 * Retrieves the bigram probability for the two given words
 * 
 * @param word1
 *            the first word of the bigram
 * @param word2
 *            the second word of the bigram
 * @return the log probability
 */
private float getBigramProb(int word1, int word2) {
  NGramBuffer bigram = getBigramBuffer(word1);
  NGramProbability bigramProbability = bigram.findNGram(word2);
  return ngramProbTable[1][bigramProbability.getProbabilityID()];
}

/**
 * Finds the NGram probabilities for the given nth word in a NGram.
 *
 * @param nthWordID the ID of the nth word
 * @return the NGramProbability of the given nth word
 */
public NGramProbability findNGram(int nthWordID) {
  int mid, start = 0, end = getNumberNGrams() - 1;
  NGramProbability ngram = null;
  while ((end - start) > 0) {
    mid = (start + end) / 2;
    int midWordID = getWordID(mid);
    if (midWordID < nthWordID) {
      start = mid + 1;
    } else if (midWordID > nthWordID) {
      end = mid;
    } else {
      ngram = getNGramProbability(mid);
      break;
    }
  }
  return ngram;
}

/**
 * Finds the NGram probability ID for the given nth word in a NGram.
 *
 * @param nthWordID the ID of the nth word
 * @return the NGram Probability ID of the given nth word
 */
public int findProbabilityID(int nthWordID) {
  int mid, start = 0, end = getNumberNGrams();
  int nGram = -1;
  while ((end - start) > 0) {
    mid = (start + end) / 2;
    int midWordID = getWordID(mid);
    if (midWordID < nthWordID) {
      start = mid + 1;
    } else if (midWordID > nthWordID) {
      end = mid;
    } else {
      nGram = getProbabilityID(mid);
      break;
    }
  }
  return nGram;
}

/**
 * Returns the NGramProbability of the nth follower.
 *
 * @param nthFollower which follower
 * @return the NGramProbability of the nth follower
 */
public int getProbabilityID(int nthFollower) {
  int nthPosition = 0;
  
  nthPosition = nthFollower * LargeNGramModel.BYTES_PER_NGRAM * ((is32bits) ? 4 : 2);
  setPosition(nthPosition + ((is32bits) ? 4 : 2)); // to skip the word ID
  
  return readBytesAsInt();
}

/**
 * Finds the NGram index for the given nth word in a NGram
 * 
 * @param nthWordID the ID of the nth word
 * @return the NGramIndex of the given nth word
 */
public int findNGramIndex(int nthWordID) {
  int mid = -1, start = 0, end = getNumberNGrams() - 1;
  while ((end - start) > 0) {
    mid = (start + end) / 2;
    int midWordID = getWordID(mid);
    if (midWordID < nthWordID) {
      start = mid + 1;
    } else if (midWordID > nthWordID) {
      end = mid;
    } else {
      break;
    }
  }
  return mid;
}

int lastWordId = getWordID(ws.getWord(ws.size() - 1));
nMinus1Buffer = getNGramBuffer(ws.getOldest());
int index = nMinus1Buffer.findNGramIndex(lastWordId);
int firstNMinus1GramEntry = nMinus1Buffer.getFirstNGramEntry();
firstCurrentNGramEntry = getFirstNGramEntry(
    nMinus1Buffer.getNGramProbability(index),
    firstNMinus1GramEntry, orderBuffer);
int firstNextNGramEntry = getFirstNGramEntry(
    nMinus1Buffer.getNGramProbability(index + 1),
    firstNMinus1GramEntry, orderBuffer);
numberNGrams = firstNextNGramEntry - firstCurrentNGramEntry;
      firstCurrentNGramEntry);
} else {
  currentBuffer = new NGramBuffer(buffer, numberNGrams,
      loader.getBigEndian(), is32bits(), orderBuffer,
      firstCurrentNGramEntry);

double ugbackoff = logMath.logToLinear(logugbackoff);
for (int j = 0; j < bigram.getNumberNGrams(); j++) {
  int wordID = bigram.getWordID(j);
  NGramProbability bgProb = bigram.getNGramProbability(j);
  continue;
for (int j = 0; j < bigram.getNumberNGrams(); j++) {
  float smearTerm;
  NGramProbability bgProb = bigram.getNGramProbability(j);
  float logbgbackoff = ngramBackoffTable[2][bgProb.getBackoffID()];
  double bgbackoff = logMath.logToLinear(logbgbackoff);
  int k = bigram.getWordID(j);
  NGramBuffer trigram = loadTrigramBuffer(i, k);
    double bg_numerator = 0;
    double bg_denominator = 0;
    for (int l = 0; l < trigram.getNumberNGrams(); l++) {
      int m = trigram.getWordID(l);
      float logtgprob = ngramProbTable[2][trigram
          .getProbabilityID(l)];
      double tgprob = logMath.logToLinear(logtgprob);
      float logbgprob = getBigramProb(k, m);

/** Clears the various N-gram caches. */
private void clearCache() {
  for (int i = 0; i < loadedBigramBuffers.length; i++) {
    NGramBuffer buffer = loadedBigramBuffers[i];
    if (buffer != null) {
      if (!buffer.getUsed())
        loadedBigramBuffers[i] = null; // free the BigramBuffer
      else
        buffer.setUsed(false);
    }
  }
  loadedBigramBuffers = new NGramBuffer[unigrams.length];
  for (int i = 2; i <= loader.getMaxDepth(); i++) {
    loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>();
  }
  logger.info("LM Cache Size: " + ngramProbCache.size() + " Hits: "
      + ngramHits + " Misses: " + ngramMisses);
  if (clearCacheAfterUtterance) {
    ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize);
  }
}

int lastWordId = getWordID(ws.getWord(ws.size() - 1));
nMinus1Buffer = getNGramBuffer(ws.getOldest());
int index = nMinus1Buffer.findNGramIndex(lastWordId);
int firstNMinus1GramEntry = nMinus1Buffer.getFirstNGramEntry();
firstCurrentNGramEntry = getFirstNGramEntry(
    nMinus1Buffer.getNGramProbability(index),
    firstNMinus1GramEntry, orderBuffer);
int firstNextNGramEntry = getFirstNGramEntry(
    nMinus1Buffer.getNGramProbability(index + 1),
    firstNMinus1GramEntry, orderBuffer);
numberNGrams = firstNextNGramEntry - firstCurrentNGramEntry;
      firstCurrentNGramEntry);
} else {
  currentBuffer = new NGramBuffer(buffer, numberNGrams,
      loader.getBigEndian(), is32bits(), orderBuffer,
      firstCurrentNGramEntry);

double ugbackoff = logMath.logToLinear(logugbackoff);
for (int j = 0; j < bigram.getNumberNGrams(); j++) {
  int wordID = bigram.getWordID(j);
  NGramProbability bgProb = bigram.getNGramProbability(j);
  continue;
for (int j = 0; j < bigram.getNumberNGrams(); j++) {
  float smearTerm;
  NGramProbability bgProb = bigram.getNGramProbability(j);
  float logbgbackoff = ngramBackoffTable[2][bgProb.getBackoffID()];
  double bgbackoff = logMath.logToLinear(logbgbackoff);
  int k = bigram.getWordID(j);
  NGramBuffer trigram = loadTrigramBuffer(i, k);
    double bg_numerator = 0;
    double bg_denominator = 0;
    for (int l = 0; l < trigram.getNumberNGrams(); l++) {
      int m = trigram.getWordID(l);
      float logtgprob = ngramProbTable[2][trigram
          .getProbabilityID(l)];
      double tgprob = logMath.logToLinear(logtgprob);
      float logbgprob = getBigramProb(k, m);

/**
 * Returns the word ID of the nth follower, assuming that the ID is the first two bytes of the NGram entry.
 *
 * @param nthFollower starts from 0 to (numberFollowers - 1).
 * @return the word ID
 */
public final int getWordID(int nthFollower) {
  int nthPosition = nthFollower * (buffer.length / numberNGrams);
  setPosition(nthPosition);
  return readBytesAsInt();
}

out.writeInt(bigram.getNumberNGrams());
for (int j = 0; j < bigram.getNumberNGrams(); j++) {
  int k = bigram.getWordID(j);
  Float smearTerm = getSmearTerm(i, k);
  out.writeInt(k);

/** Clears the various N-gram caches. */
private void clearCache() {
  for (int i = 0; i < loadedBigramBuffers.length; i++) {
    NGramBuffer buffer = loadedBigramBuffers[i];
    if (buffer != null) {
      if (!buffer.getUsed())
        loadedBigramBuffers[i] = null; // free the BigramBuffer
      else
        buffer.setUsed(false);
    }
  }
  loadedBigramBuffers = new NGramBuffer[unigrams.length];
  for (int i = 2; i <= loader.getMaxDepth(); i++) {
    loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>();
  }
  logger.info("LM Cache Size: " + ngramProbCache.size() + " Hits: "
      + ngramHits + " Misses: " + ngramMisses);
  if (clearCacheAfterUtterance) {
    ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize);
  }
}

  /**
   * Returns the NGramProbability of the nth follower.
   *
   * @param nthFollower which follower
   * @return the NGramProbability of the nth follower
   */
  public NGramProbability getNGramProbability(int nthFollower) {
    int nthPosition = 0, wordID = 0, probID = 0, backoffID = 0, firstNGram = 0;
    
    nthPosition = nthFollower * LargeNGramModel.BYTES_PER_NGRAM * ((is32bits) ? 4 : 2);
    
    setPosition(nthPosition);
    
    wordID = readBytesAsInt();
    probID = readBytesAsInt();
    backoffID = readBytesAsInt();
    firstNGram = readBytesAsInt();
      
    return (new NGramProbability(nthFollower, wordID, probID, backoffID, firstNGram));
  }
}

NGramBuffer bigram = getBigramBuffer(i);
if (bigram.getNumberNGrams() != numBigrams) {
  in.close();
  throw new IOException("Bad ngrams for unigram " + i + " Found "
      + numBigrams + " expected " + bigram.getNumberNGrams());
  int k = bigram.getWordID(j);
  putSmearTerm(i, k, in.readFloat());

/**
 * Finds the NGram probabilities for the given nth word in a NGram.
 *
 * @param nthWordID the ID of the nth word
 * @return the NGramProbability of the given nth word
 */
public NGramProbability findNGram(int nthWordID) {
  int mid, start = 0, end = getNumberNGrams() - 1;
  NGramProbability ngram = null;
  while ((end - start) > 0) {
    mid = (start + end) / 2;
    int midWordID = getWordID(mid);
    if (midWordID < nthWordID) {
      start = mid + 1;
    } else if (midWordID > nthWordID) {
      end = mid;
    } else {
      ngram = getNGramProbability(mid);
      break;
    }
  }
  return ngram;
}

/**
 * Finds the NGram probability ID for the given nth word in a NGram.
 *
 * @param nthWordID the ID of the nth word
 * @return the NGram Probability ID of the given nth word
 */
public int findProbabilityID(int nthWordID) {
  int mid, start = 0, end = getNumberNGrams();
  int nGram = -1;
  while ((end - start) > 0) {
    mid = (start + end) / 2;
    int midWordID = getWordID(mid);
    if (midWordID < nthWordID) {
      start = mid + 1;
    } else if (midWordID > nthWordID) {
      end = mid;
    } else {
      nGram = getProbabilityID(mid);
      break;
    }
  }
  return nGram;
}

/**
 * Finds or loads the NGram probability of the given NGram.
 * 
 * @param wordSequence
 *            the NGram to load
 * @return a NGramProbability of the given NGram
 */
private NGramProbability findNGram(WordSequence wordSequence) {
  int numberWords = wordSequence.size();
  NGramProbability nGram = null;
  WordSequence oldest = wordSequence.getOldest();
  NGramBuffer nGramBuffer = loadedNGramBuffers[numberWords - 1]
      .get(oldest);
  if (nGramBuffer == null) {
    nGramBuffer = getNGramBuffer(oldest);
    if (nGramBuffer != null)
      loadedNGramBuffers[numberWords - 1].put(oldest, nGramBuffer);
  }
  if (nGramBuffer != null) {
    int nthWordID = getWordID(wordSequence.getWord(numberWords - 1));
    nGram = nGramBuffer.findNGram(nthWordID);
  }
  return nGram;
}

/**
 * Returns the NGramProbability of the nth follower.
 *
 * @param nthFollower which follower
 * @return the NGramProbability of the nth follower
 */
public int getProbabilityID(int nthFollower) {
  int nthPosition = 0;
  
  nthPosition = nthFollower * LargeNGramModel.BYTES_PER_NGRAM * ((is32bits) ? 4 : 2);
  setPosition(nthPosition + ((is32bits) ? 4 : 2)); // to skip the word ID
  
  return readBytesAsInt();
}

/**
 * Finds the NGram index for the given nth word in a NGram
 * 
 * @param nthWordID the ID of the nth word
 * @return the NGramIndex of the given nth word
 */
public int findNGramIndex(int nthWordID) {
  int mid = -1, start = 0, end = getNumberNGrams() - 1;
  while ((end - start) > 0) {
    mid = (start + end) / 2;
    int midWordID = getWordID(mid);
    if (midWordID < nthWordID) {
      start = mid + 1;
    } else if (midWordID > nthWordID) {
      end = mid;
    } else {
      break;
    }
  }
  return mid;
}

Javadoc

Implements a buffer that contains NGrams. It assumes that the first two bytes of each n-gram entry is the ID of the n-gram.

Most used methods

<init>
Constructs a NGramBuffer object with the given byte[].
findNGram
Finds the NGram probabilities for the given nth word in a NGram.
findNGramIndex
Finds the NGram index for the given nth word in a NGram
getFirstNGramEntry
Returns the firstNGramEntry
getNGramProbability
Returns the NGramProbability of the nth follower.
getNumberNGrams
Returns the number of n-grams in this buffer.
getProbabilityID
Returns the NGramProbability of the nth follower.
getUsed
Returns true if this buffer was used in the last utterance.
getWordID
Returns the word ID of the nth follower, assuming that the ID is the first two bytes of the NGram en
readBytesAsInt
Reads the next two bytes from the buffer's current position as an integer.
setPosition
Sets the position of the buffer.
setUsed
Sets whether this buffer was used in the last utterance

Popular in Java

Finding current android device location
onCreateOptionsMenu (Activity)
getSupportFragmentManager (FragmentActivity)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Github Copilot alternatives

How to useNGramBuffer in edu.cmu.sphinx.linguist.language.ngram.large

Best Java code snippets using edu.cmu.sphinx.linguist.language.ngram.large.NGramBuffer (Showing top 20 results out of 315)

How to use
NGramBuffer
in
edu.cmu.sphinx.linguist.language.ngram.large