/** * Gets the "key" (word + context offset) for a given offset * * @param contextOffset_ * @param ngramOrder * @return */ private long getKey(final long offset, final int ngramOrder) { return getMap(ngramOrder).getKey(offset); }
/** * @param ngramOrder * @return */ private HashMap getHashMapForOrder(final int ngramOrder) { HashMap map = getMap(ngramOrder); if (map == null) { final long newCapacity = initCapacities[ngramOrder]; assert newCapacity >= 0 : "Bad capacity " + newCapacity + " for order " + ngramOrder; map = initMap(newCapacity, ngramOrder); } return map; }
public Iterable<Long> getNgramOffsetsForOrder(final int ngramOrder) { final HashMap map = getMap(ngramOrder); if (map == null) return Collections.emptyList(); else return map.keys(); }
@Override public long getNumNgrams(final int ngramOrder) { return getMap(ngramOrder).size(); }
@Override public Iterable<Entry<T>> getNgramsForOrder(final int ngramOrder) { final HashMap map = getMap(ngramOrder); if (map == null) return Collections.emptyList(); else return Iterators.able(new Iterators.Transform<Long, Entry<T>>(map.keys().iterator()) { @Override protected Entry<T> transform(final Long next) { final long offset = next; final T val = values.getScratchValue(); values.getFromOffset(offset, ngramOrder, val); return new Entry<T>(getNgramForOffset(offset, ngramOrder), val); } }); }
public long getTotalSize() { long ret = 0L; for (int ngramOrder = 0; ngramOrder < getMaxNgramOrder(); ++ngramOrder) { final HashMap currMap = getMap(ngramOrder); if (currMap == null) break; ret += currMap.size(); } return ret; }
public int getLastWordForOffset(final long offset, final int ngramOrder) { final long key = getMap(ngramOrder).getKey(offset); return wordOf(key); }
public int[] getNgramForOffset(final long offset, final int ngramOrder, final int[] ret) { long offset_ = offset; for (int i = 0; i <= ngramOrder; ++i) { final long key = getMap(ngramOrder - i).getKey(offset_); offset_ = contextOffsetOf(key); final int word_ = wordOf(key); ret[reversed ? (i) : (ngramOrder - i)] = word_; } return ret; }
public int getFirstWordForOffset(final long offset, final int ngramOrder) { final long key = getMap(ngramOrder).getKey(offset); if (ngramOrder == 0) return wordOf(key); else return getFirstWordForOffset(contextOffsetOf(key), ngramOrder - 1); }
/** * @param ngram * @param startPos * @param endPos * @return */ private long getOffsetFromRawNgram(final int[] ngram, final int startPos, final int endPos) { if (containsOutOfVocab(ngram, startPos, endPos)) return -1; final int ngramOrder = endPos - startPos - 1; if (ngramOrder >= getMaxNgramOrder()) return -1; final long key = getKey(ngram, startPos, endPos); if (key < 0) return -1; final HashMap currMap = getMap(ngramOrder); if (currMap == null) return -1; final long index = currMap.getOffset(key); return index; }
@Override public void trim() { for (int ngramOrder = 0; ngramOrder < getMaxNgramOrder(); ++ngramOrder) { final HashMap currMap = getMap(ngramOrder); if (currMap == null) break; values.trimAfterNgram(ngramOrder, currMap.getCapacity()); Logger.logss("Load factor for " + (ngramOrder + 1) + ": " + currMap.getLoadFactor()); } values.trim(); }