/** * Reads weights quantation object from stream * @param order - max order of ngrams for this model * @return quantation object, see {@link NgramTrieQuant} * @throws IOException if reading from stream failed */ public NgramTrieQuant readQuant(int order) throws IOException { int quantTypeInt = Utilities.readLittleEndianInt(inStream); if (quantTypeInt < 0 || quantTypeInt >= NgramTrieQuant.QuantType.values().length) throw new Error("Unknown quantatization type: " + quantTypeInt); NgramTrieQuant.QuantType quantType = NgramTrieQuant.QuantType.values()[quantTypeInt]; NgramTrieQuant quant = new NgramTrieQuant(order, quantType); //reading tables for (int i = 2; i <= order; i++) { quant.setTable(readFloatArr(quant.getProbTableLen()), i, true); if (i < order) quant.setTable(readFloatArr(quant.getBackoffTableLen()), i, false); } return quant; }
/** * Reads encoded probability from provided trie bit array and decodes it into actual value * for specific ngram * @param bitArr - trie bit array * @param memPtr - memory pointer for specific ngram order * @param bitOffset - offset from memPtr that is calculated according to ngram index * @param orderMinusTwo - order of ngram minus two * @return probability of ngram */ public float readProb(NgramTrieBitarr bitArr, int memPtr, int bitOffset, int orderMinusTwo) { switch (quantType) { case NO_QUANT: return bitArr.readNegativeFloat(memPtr, bitOffset); case QUANT_16: int tableIdx = orderMinusTwo * 2; if (tableIdx < tables.length - 1) bitOffset += backoffBits; return binsDecode(tableIdx, bitArr.readInt(memPtr, bitOffset, backoffMask)); //TODO implement different quantization stages default: throw new Error("Unsupported quantization type: " + quantType); } }
trie = new NgramTrie(counts, quant.getProbBoSize(), quant.getProbSize()); loader.readTrieByteArr(trie.getMem());
/** * Finds ngram of cerain order in specified range and reads it's probability. * Range contains ngram successors after function execution. * If ngram is not found, range will be invalid. * @param wordId - word id to look for * @param orderMinusTwo - order of ngram minus two * @param range - range to look in, contains ngram successors after function execution * @param quant - quantation object to decode compressed probability stored in trie * @return probability of ngram */ public float readNgramProb(int wordId, int orderMinusTwo, TrieRange range, NgramTrieQuant quant) { int ptr; NgramSet ngram = getNgram(orderMinusTwo); if ((ptr = findNgram(ngram, wordId, range)) < 0) return 0.0f; return quant.readProb(bitArr, ngram.memPtr, ngram.getNgramWeightsOffset(ptr), orderMinusTwo); }
/** * Finds ngram of cerain order in specified range and reads it's backoff. * Range contains ngram successors after function execution. * If ngram is not found, range will be invalid. * @param wordId - word id to look for * @param orderMinusTwo - order of ngram minus two * @param range - range to look in, contains ngram successors after function execution * @param quant - quantation object to decode compressed backoff stored in trie * @return backoff of ngram */ public float readNgramBackoff(int wordId, int orderMinusTwo, TrieRange range, NgramTrieQuant quant) { int ptr; NgramSet ngram = getNgram(orderMinusTwo); if ((ptr = findNgram(ngram, wordId, range)) < 0) return 0.0f; return quant.readBackoff(bitArr, ngram.memPtr, ngram.getNgramWeightsOffset(ptr), orderMinusTwo); }
/** * Finds ngram of cerain order in specified range and reads it's probability. * Range contains ngram successors after function execution. * If ngram is not found, range will be invalid. * @param wordId - word id to look for * @param orderMinusTwo - order of ngram minus two * @param range - range to look in, contains ngram successors after function execution * @param quant - quantation object to decode compressed probability stored in trie * @return probability of ngram */ public float readNgramProb(int wordId, int orderMinusTwo, TrieRange range, NgramTrieQuant quant) { int ptr; NgramSet ngram = getNgram(orderMinusTwo); if ((ptr = findNgram(ngram, wordId, range)) < 0) return 0.0f; return quant.readProb(bitArr, ngram.memPtr, ngram.getNgramWeightsOffset(ptr), orderMinusTwo); }
/** * Finds ngram of cerain order in specified range and reads it's backoff. * Range contains ngram successors after function execution. * If ngram is not found, range will be invalid. * @param wordId - word id to look for * @param orderMinusTwo - order of ngram minus two * @param range - range to look in, contains ngram successors after function execution * @param quant - quantation object to decode compressed backoff stored in trie * @return backoff of ngram */ public float readNgramBackoff(int wordId, int orderMinusTwo, TrieRange range, NgramTrieQuant quant) { int ptr; NgramSet ngram = getNgram(orderMinusTwo); if ((ptr = findNgram(ngram, wordId, range)) < 0) return 0.0f; return quant.readBackoff(bitArr, ngram.memPtr, ngram.getNgramWeightsOffset(ptr), orderMinusTwo); }
/** * Reads weights quantation object from stream * @param order - max order of ngrams for this model * @return quantation object, see {@link NgramTrieQuant} * @throws IOException if reading from stream failed */ public NgramTrieQuant readQuant(int order) throws IOException { int quantTypeInt = Utilities.readLittleEndianInt(inStream); if (quantTypeInt < 0 || quantTypeInt >= NgramTrieQuant.QuantType.values().length) throw new Error("Unknown quantatization type: " + quantTypeInt); NgramTrieQuant.QuantType quantType = NgramTrieQuant.QuantType.values()[quantTypeInt]; NgramTrieQuant quant = new NgramTrieQuant(order, quantType); //reading tables for (int i = 2; i <= order; i++) { quant.setTable(readFloatArr(quant.getProbTableLen()), i, true); if (i < order) quant.setTable(readFloatArr(quant.getBackoffTableLen()), i, false); } return quant; }
trie = new NgramTrie(counts, quant.getProbBoSize(), quant.getProbSize()); loader.readTrieByteArr(trie.getMem());
/** * Reads encoded backoff from provided trie bit array and decodes it into actual value * for specific ngram * @param bitArr - trie bit array * @param memPtr - memory pointer for specific ngram order * @param bitOffset - offset from memPtr that is calculated according to ngram index * @param orderMinusTwo - order of ngram minus two * @return backoffs of ngram */ public float readBackoff(NgramTrieBitarr bitArr, int memPtr, int bitOffset, int orderMinusTwo) { switch (quantType) { case NO_QUANT: bitOffset += 31; return bitArr.readFloat(memPtr, bitOffset); case QUANT_16: int tableIdx = orderMinusTwo * 2 + 1; return binsDecode(tableIdx, bitArr.readInt(memPtr, bitOffset, probMask)); //TODO implement different quantization stages default: throw new Error("Unsupported quantization type: " + quantType); } }
/** * Reads encoded probability from provided trie bit array and decodes it into actual value * for specific ngram * @param bitArr - trie bit array * @param memPtr - memory pointer for specific ngram order * @param bitOffset - offset from memPtr that is calculated according to ngram index * @param orderMinusTwo - order of ngram minus two * @return probability of ngram */ public float readProb(NgramTrieBitarr bitArr, int memPtr, int bitOffset, int orderMinusTwo) { switch (quantType) { case NO_QUANT: return bitArr.readNegativeFloat(memPtr, bitOffset); case QUANT_16: int tableIdx = orderMinusTwo * 2; if (tableIdx < tables.length - 1) bitOffset += backoffBits; return binsDecode(tableIdx, bitArr.readInt(memPtr, bitOffset, backoffMask)); //TODO implement different quantization stages default: throw new Error("Unsupported quantization type: " + quantType); } }
/** * Reads encoded backoff from provided trie bit array and decodes it into actual value * for specific ngram * @param bitArr - trie bit array * @param memPtr - memory pointer for specific ngram order * @param bitOffset - offset from memPtr that is calculated according to ngram index * @param orderMinusTwo - order of ngram minus two * @return backoffs of ngram */ public float readBackoff(NgramTrieBitarr bitArr, int memPtr, int bitOffset, int orderMinusTwo) { switch (quantType) { case NO_QUANT: bitOffset += 31; return bitArr.readFloat(memPtr, bitOffset); case QUANT_16: int tableIdx = orderMinusTwo * 2 + 1; return binsDecode(tableIdx, bitArr.readInt(memPtr, bitOffset, probMask)); //TODO implement different quantization stages default: throw new Error("Unsupported quantization type: " + quantType); } }