/** * Retrieves the bigram probability for the two given words * * @param word1 * the first word of the bigram * @param word2 * the second word of the bigram * @return the log probability */ private float getBigramProb(int word1, int word2) { NGramBuffer bigram = getBigramBuffer(word1); NGramProbability bigramProbability = bigram.findNGram(word2); return ngramProbTable[1][bigramProbability.getProbabilityID()]; }
/** * Returns the number of bigrams * * @return the number of bigrams */ public int getNumberBigrams() { return getNumberNGrams(2); }
/** * Returns all the trigram probabilities. * * @return all the trigram probabilities */ public float[] getTrigramProbabilities() { return getNGramProbabilities(3); }
/** * Returns the NGramProbability of the nth follower. * * @param nthFollower which follower * @return the NGramProbability of the nth follower */ @Override public NGramProbability getNGramProbability(int nthFollower) { int nthPosition = 0, wordID = 0, probID = 0, backoffID = 0, firstNGram = 0; nthPosition = nthFollower * LargeNGramModel.BYTES_PER_NMAXGRAM * ((is32bits()) ? 4 : 2); setPosition(nthPosition); wordID = readBytesAsInt(); probID = readBytesAsInt(); return (new NGramProbability(nthFollower, wordID, probID, backoffID, firstNGram)); } }
/** * Returns the NGramProbability of the nth follower. * * @param nthFollower which follower * @return the NGramProbability of the nth follower */ public NGramProbability getNGramProbability(int nthFollower) { int nthPosition = 0, wordID = 0, probID = 0, backoffID = 0, firstNGram = 0; nthPosition = nthFollower * LargeNGramModel.BYTES_PER_NGRAM * ((is32bits) ? 4 : 2); setPosition(nthPosition); wordID = readBytesAsInt(); probID = readBytesAsInt(); backoffID = readBytesAsInt(); firstNGram = readBytesAsInt(); return (new NGramProbability(nthFollower, wordID, probID, backoffID, firstNGram)); } }
/** * Returns the NGramProbability of the nth follower. * * @param nthFollower which follower * @return the NGramProbability of the nth follower */ @Override public int getProbabilityID(int nthFollower) { int nthPosition = 0; nthPosition = nthFollower * LargeNGramModel.BYTES_PER_NMAXGRAM * ((is32bits()) ? 4 : 2); setPosition(nthPosition + ((is32bits()) ? 4 : 2)); // to skip the word ID return readBytesAsInt(); }
/** * Returns the index of the first NGram entry of the given N-1Gram * * @param nMinus1Gram * the N-1Gram which first NGram entry we're looking for * @param firstNMinus1GramEntry * the index of the first N-1Gram entry of the N-1Gram in * question * @param n * the order of the NGram * @return the index of the first NGram entry of the given N-1Gram */ private int getFirstNGramEntry(NGramProbability nMinus1Gram, int firstNMinus1GramEntry, int n) { int firstNGramEntry = ngramSegmentTable[n - 1][(firstNMinus1GramEntry + nMinus1Gram .getWhichFollower()) >> loader.getLogNGramSegmentSize()] + nMinus1Gram.getFirstNPlus1GramEntry(); return firstNGramEntry; }
/** * Returns the NGramProbability of the nth follower. * * @param nthFollower which follower * @return the NGramProbability of the nth follower */ public int getProbabilityID(int nthFollower) { int nthPosition = 0; nthPosition = nthFollower * LargeNGramModel.BYTES_PER_NGRAM * ((is32bits) ? 4 : 2); setPosition(nthPosition + ((is32bits) ? 4 : 2)); // to skip the word ID return readBytesAsInt(); }
/** * Returns the ID of the given word. * * @param word * the word to find the ID * @return the ID of the word */ public final int getWordID(Word word) { UnigramProbability probability = getUnigram(word); if (probability == null) throw new IllegalArgumentException("No word ID: " + word); else return probability.getWordID(); }
/** * Returns the location (or offset) into the file where trigrams start. * * @return the location of the trigrams */ public long getTrigramOffset() { return getNGramOffset(3); }
/** * Tells if the model is 16 or 32 bits. * * @return true if 32 bits, false otherwise */ private boolean is32bits() { if (loader.getBytesPerField() == 4) return true; return false; }
/** * Returns all the trigram backoff weights * * @return all the trigram backoff weights */ public float[] getTrigramBackoffWeights() { return getNGramBackoffWeights(3); }
/** * Returns the trigram segment table. * * @return the trigram segment table */ public int[] getTrigramSegments() { return getNGramSegments(3); }
/** * Returns the number of bigram followers of a word. * * @param wordID * the ID of the word * @return the number of bigram followers */ private int getNumberBigramFollowers(int wordID) { if (wordID == unigrams.length - 1) return 0; else return unigrams[wordID + 1].getFirstBigramEntry() - unigrams[wordID].getFirstBigramEntry(); }
public void deallocate() throws IOException { loader.deallocate(); }
/** * Returns the word ID of the nth follower, assuming that the ID is the first two bytes of the NGram entry. * * @param nthFollower starts from 0 to (numberFollowers - 1). * @return the word ID */ public final int getWordID(int nthFollower) { int nthPosition = nthFollower * (buffer.length / numberNGrams); setPosition(nthPosition); return readBytesAsInt(); }
/** * Returns the number of unigrams * * @return the number of unigrams */ public int getNumberUnigrams() { return getNumberNGrams(1); }
/** * Returns all the bigram probabilities. * * @return all the bigram probabilities */ public float[] getBigramProbabilities() { return getNGramProbabilities(2); }
/** * Returns the location (or offset) into the file where bigrams start. * * @return the location of the bigrams */ public long getBigramOffset() { return getNGramOffset(2); }
/** * Returns the number of trigrams * * @return the number of trigrams */ public int getNumberTrigrams() { return getNumberNGrams(3); }