private void compress(final int ngramOrder) { if (ngramOrder > 0) { (maps[ngramOrder]).compressedKeys = compress(maps[ngramOrder].getUncompressedKeys(), maps[ngramOrder].size(), ngramOrder); ((CompressibleValueContainer<T>) values).clearStorageAfterCompression(ngramOrder); maps[ngramOrder].clearUncompressedKeys(); } }
@Override public long put(final int[] ngram, int startPos, int endPos, final T val) { final int ngramOrder = endPos - startPos - 1; final int word = reverseTrie ? ngram[startPos] : ngram[endPos - 1]; final long contextOffset = reverseTrie ? getContextOffset(ngram, startPos + 1, endPos) : getContextOffset(ngram, startPos, endPos - 1); if (contextOffset < 0) return -1; CompressedMap map = maps[ngramOrder]; if (map == null) { map = maps[ngramOrder] = new CompressedMap(); final long l = numNgramsForEachOrder[ngramOrder]; maps[ngramOrder].init(l); values.setSizeAtLeast(l, ngramOrder); } long oldSize = map.size(); final long newOffset = map.add(combineToKey(word, contextOffset)); values.add(ngram, startPos, endPos, ngramOrder, map.size() - 1, contextOffset, word, val, (-1), map.size() == oldSize); return newOffset; }
@Override public void handleNgramsFinished(final int justFinishedOrder) { final CompressedMap compressedMap = maps[justFinishedOrder - 1]; if (compressedMap != null) { final LongArray currKeys = compressedMap.getUncompressedKeys(); final long currSize = currKeys.size(); sort(currKeys, 0, currSize - 1, justFinishedOrder - 1); compressedMap.trim(); values.trimAfterNgram(justFinishedOrder - 1, currSize); compress(justFinishedOrder - 1); } }
@Override public long getNumNgrams(int ngramOrder) { return maps[ngramOrder].size(); }
@Override public long put(final int[] ngram, final int startPos, final int endPos, final T val) { final int ngramOrder = endPos - startPos - 1; final int word = reverseTrie ? ngram[startPos] : ngram[endPos - 1]; final long contextOffset = reverseTrie ? getContextOffset(ngram, startPos + 1, endPos, null) : getContextOffset(ngram, startPos, endPos - 1, null); if (contextOffset < 0) return -1; CompressedMap map = maps[ngramOrder]; if (map == null) { map = maps[ngramOrder] = new CompressedMap(); final long l = numNgramsForEachOrder[ngramOrder]; maps[ngramOrder].init(l); values.setSizeAtLeast(l, ngramOrder); } final long oldSize = map.size(); final long newOffset = map.add(combineToKey(word, contextOffset)); final boolean addWorked = values.add(ngram, startPos, endPos, ngramOrder, map.size() - 1, contextOffset, word, val, -1, map.size() == oldSize); if (!addWorked) return -1; return newOffset; }
@Override public void handleNgramsFinished(final int justFinishedOrder) { final LongArray currKeys = maps[justFinishedOrder - 1].getUncompressedKeys(); final long currSize = currKeys.size(); sort(currKeys, 0, currSize - 1, justFinishedOrder - 1); maps[justFinishedOrder - 1].trim(); values.trimAfterNgram(justFinishedOrder - 1, currSize); compress(justFinishedOrder - 1); }
@Override public long getNumNgrams(final int ngramOrder) { return maps[ngramOrder].size(); }
private void compress(final int ngramOrder) { if (ngramOrder > 0) { (maps[ngramOrder]).compressedKeys = compress(maps[ngramOrder].getUncompressedKeys(), maps[ngramOrder].size(), ngramOrder); ((CompressibleValueContainer<T>) values).clearStorageAfterCompression(ngramOrder); } maps[ngramOrder].clearUncompressedKeys(); }
private long decompressSearch(final LongArray compressed, final long searchKey, final int ngramOrder, final T outputVal, final long searchOffset) { if (ngramOrder == 0) { final boolean lookingForOffset = searchKey >= 0; int word = lookingForOffset ? AbstractNgramMap.wordOf(searchKey) : (int) searchOffset; if (word < 0 || word >= maps[0].size()) return -1; if (outputVal != null) values.getFromOffset(word, 0, outputVal); return lookingForOffset ? word : AbstractNgramMap.combineToKey(word, 0); } final long fromIndex = 0; final long toIndex = ((compressed.size() / compressedBlockSize) - 1); final long low = binarySearchBlocks(compressed, compressed.size(), searchKey, fromIndex, toIndex, searchOffset); if (low < 0) return -1; final long index = decompressLinearSearch(compressed, low, searchKey, ngramOrder, outputVal, searchOffset); return index; }
private long decompressSearch(final LongArray compressed, final long searchKey, final int ngramOrder, final T outputVal, final long searchOffset) { if (ngramOrder == 0) { final boolean lookingForOffset = searchKey >= 0; final int word = lookingForOffset ? wordOf(searchKey) : (int) searchOffset; if (word < 0 || word >= maps[0].size()) return -1; if (outputVal != null) values.getFromOffset(word, 0, outputVal); return lookingForOffset ? word : combineToKey(word, 0); } else { if (compressed == null) return -1; final long fromIndex = 0; final long toIndex = ((compressed.size() / compressedBlockSize) - 1); final long low = binarySearchBlocks(compressed, compressed.size(), searchKey, fromIndex, toIndex, searchOffset); if (low < 0) return -1; final long index = decompressLinearSearch(compressed, low, searchKey, ngramOrder, outputVal, searchOffset); return index; } }