|| location.getProtocol().equals("file")) { try { loader = new BinaryLoader(new File(location.toURI())); } catch (Exception ex) { loader = new BinaryLoader(new File(location.getPath())); loader = new BinaryLoader(location); loader.verifyHeader(); counts = loader.readCounts(); if (maxDepth <= 0 || maxDepth > counts.length) maxDepth = counts.length; if (maxDepth > 1) { quant = loader.readQuant(maxDepth); unigrams = loader.readUnigrams(counts[0]); if (maxDepth > 1) { trie = new NgramTrie(counts, quant.getProbBoSize(), quant.getProbSize()); loader.readTrieByteArr(trie.getMem()); words = loader.readWords(counts[0]); buildUnigramIDMap(); ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize); loader.close(); TimerPool.getTimer(this, "Load LM").stop();
public BinaryLoader(URL location) throws IOException { loadModelData(location.openStream()); }
/** * Reads language model order and ngram counts * @return array of counts where ordinal number is ngram order * @throws IOException if reading from stream failed */ public int[] readCounts() throws IOException { int order = readOrder(); int[] counts = new int[order]; for (int i = 0; i < counts.length; i++) { counts[i] = Utilities.readLittleEndianInt(inStream); } return counts; }
/** * Reads header from stream and checks if it matches trie header * @throws IOException if reading from stream failed */ public void verifyHeader() throws IOException { String readHeader = readString(inStream, TRIE_HEADER.length()); if (!readHeader.equals(TRIE_HEADER)) { throw new Error("Bad binary LM file header: " + readHeader); } }
/** * Reads weights quantation object from stream * @param order - max order of ngrams for this model * @return quantation object, see {@link NgramTrieQuant} * @throws IOException if reading from stream failed */ public NgramTrieQuant readQuant(int order) throws IOException { int quantTypeInt = Utilities.readLittleEndianInt(inStream); if (quantTypeInt < 0 || quantTypeInt >= NgramTrieQuant.QuantType.values().length) throw new Error("Unknown quantatization type: " + quantTypeInt); NgramTrieQuant.QuantType quantType = NgramTrieQuant.QuantType.values()[quantTypeInt]; NgramTrieQuant quant = new NgramTrieQuant(order, quantType); //reading tables for (int i = 2; i <= order; i++) { quant.setTable(readFloatArr(quant.getProbTableLen()), i, true); if (i < order) quant.setTable(readFloatArr(quant.getBackoffTableLen()), i, false); } return quant; }
/** * Reads header from stream and checks if it matches trie header * @throws IOException if reading from stream failed */ public void verifyHeader() throws IOException { String readHeader = readString(inStream, TRIE_HEADER.length()); if (!readHeader.equals(TRIE_HEADER)) { throw new Error("Bad binary LM file header: " + readHeader); } }
/** * Reads weights quantation object from stream * @param order - max order of ngrams for this model * @return quantation object, see {@link NgramTrieQuant} * @throws IOException if reading from stream failed */ public NgramTrieQuant readQuant(int order) throws IOException { int quantTypeInt = Utilities.readLittleEndianInt(inStream); if (quantTypeInt < 0 || quantTypeInt >= NgramTrieQuant.QuantType.values().length) throw new Error("Unknown quantatization type: " + quantTypeInt); NgramTrieQuant.QuantType quantType = NgramTrieQuant.QuantType.values()[quantTypeInt]; NgramTrieQuant quant = new NgramTrieQuant(order, quantType); //reading tables for (int i = 2; i <= order; i++) { quant.setTable(readFloatArr(quant.getProbTableLen()), i, true); if (i < order) quant.setTable(readFloatArr(quant.getBackoffTableLen()), i, false); } return quant; }
|| location.getProtocol().equals("file")) { try { loader = new BinaryLoader(new File(location.toURI())); } catch (Exception ex) { loader = new BinaryLoader(new File(location.getPath())); loader = new BinaryLoader(location); loader.verifyHeader(); counts = loader.readCounts(); if (maxDepth <= 0 || maxDepth > counts.length) maxDepth = counts.length; if (maxDepth > 1) { quant = loader.readQuant(maxDepth); unigrams = loader.readUnigrams(counts[0]); if (maxDepth > 1) { trie = new NgramTrie(counts, quant.getProbBoSize(), quant.getProbSize()); loader.readTrieByteArr(trie.getMem()); words = loader.readWords(counts[0]); buildUnigramIDMap(); ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize); loader.close(); TimerPool.getTimer(this, "Load LM").stop();
/** * Reads language model order and ngram counts * @return array of counts where ordinal number is ngram order * @throws IOException if reading from stream failed */ public int[] readCounts() throws IOException { int order = readOrder(); int[] counts = new int[order]; for (int i = 0; i < counts.length; i++) { counts[i] = Utilities.readLittleEndianInt(inStream); } return counts; }
public BinaryLoader(URL location) throws IOException { loadModelData(location.openStream()); }