public NgramIterableWrapper(final NgramMap<V> map, final WordIndexer<W> wordIndexer) { this(map, wordIndexer, map.getMaxNgramOrder()); }
public ArrayEncodedProbBackoffLm(final int lmOrder, final WordIndexer<W> wordIndexer, final NgramMap<ProbBackoffPair> map, final ConfigOptions opts) { super(lmOrder, wordIndexer, (float) opts.unknownWordLogProb); this.map = map; this.values = (ProbBackoffValueContainer) map.getValues(); useScratchValues = !(map instanceof ContextEncodedNgramMap); numWords = map.getNumNgrams(0); }
@Override public void handleNgramOrderFinished(final int order) { for (final int[] ngram : failures) { if (ngram.length == order) {// && !map.contains(ngram, 0, ngram.length)) { map.put(ngram, 0, ngram.length, null); } } map.handleNgramsFinished(order); }
/** * @param scratch * @param ngram */ private V getForArray(int[] ngram) { long probContext = 0L; int probContextOrder = -1; V scratch = map.getValues().getScratchValue(); NgramMap<V> localMap = map; int endPos_ = ngram.length; int startPos_ = 0; for (int i = endPos_ - 1; i >= startPos_; --i) { probContext = localMap.getValueAndOffset(probContext, probContextOrder, ngram[i], scratch); if (probContext < 0) return null; probContextOrder++; } return scratch; }
/** * Gets the raw count of an n-gram. * * @param ngram * @param startPos * @param endPos * @return count of n-gram, or -1 if n-gram is not in the map. */ public long getRawCount(final int[] ngram, final int startPos, final int endPos) { final NgramMap<LongRef> localMap = map; long probContext = 0L; final LongRef scratch = new LongRef(-1L); for (int probContextOrder = -1; probContextOrder < endPos - startPos - 1; ++probContextOrder) { assert (probContext >= 0); probContext = localMap.getValueAndOffset(probContext, probContextOrder, ngram[endPos - probContextOrder - 2], scratch); if (probContext < 0) { return -1; } } return scratch.value; }
@Override public void call(final int[] ngram, final int startPos, final int endPos, final V v, final String words) { final long add = map.put(ngram, startPos, endPos, v); if (add < 0) { if (canFail) { for (int endPos_ = endPos - 1; (endPos_ > startPos); endPos_--) { if (!map.contains(ngram, startPos, endPos_)) { failures.add(Arrays.copyOfRange(ngram, startPos, endPos_)); } } for (int startPos_ = startPos + 1; (startPos_ < endPos); startPos_++) { if (!map.contains(ngram, startPos_, endPos)) { failures.add(Arrays.copyOfRange(ngram, startPos_, endPos)); } } } else { throw new RuntimeException("Failed to add line " + words); } } }
@Override public void initWithLengths(final List<Long> numNGrams) { map.initWithLengths(numNGrams); }
@Override public void cleanup() { map.trim(); }
public long size() { return map.getNumNgrams(ngramOrder); } }
@Override public Iterator<Entry<List<W>, V>> iterator() { return new Iterators.Transform<NgramMap.Entry<V>, java.util.Map.Entry<List<W>, V>>(map.getNgramsForOrder(ngramOrder).iterator()) { @Override protected Entry<List<W>, V> transform(final edu.berkeley.nlp.lm.map.NgramMap.Entry<V> next) { return new java.util.Map.Entry<List<W>, V>() { @Override public List<W> getKey() { final List<W> ngram = WordIndexer.StaticMethods.toList(wordIndexer, next.key); return ngram; } @Override public V getValue() { return next.value; } @Override public V setValue(final V arg0) { throw new UnsupportedOperationException("Method not yet implemented"); } }; } }; }
public ProbBackoffLm(final int lmOrder, final WordIndexer<W> wordIndexer, final NgramMap<ProbBackoffPair> map, final ConfigOptions opts) { super(lmOrder, wordIndexer, (float) opts.unknownWordLogProb); this.map = map; this.values = (ProbBackoffValueContainer) map.getValues(); }
@Override public void handleNgramOrderFinished(final int order) { map.handleNgramsFinished(order); }
@Override public void call(final int[] ngram, int startPos, int endPos, final V v, final String words) { final long add = map.put(ngram, startPos, endPos, v); if (add < 0) { if (warnCount >= 0 && warnCount < 10) { Logger.warn("Could not add line " + words + "\nThis is probabcly because the prefix or suff of the n-grams was not already in the map. This will be fixed in an upcoming release."); warnCount++; } if (warnCount > 10) warnCount = -1; } }
map.clearStorage(); map = createNgramMap(opts, numNgramsForEachWord, numNgramsForEachOrder, reversed, newValues, compress); lmReader.parse(new NgramMapAddingCallback<V>(map, failures));
@Override public float getLogProb(final int[] ngram, final int startPos, final int endPos) { final NgramMap<LongRef> localMap = map; float logProb = oovWordLogProb; long probContext = 0L; int probContextOrder = -1; long backoffContext = 0L; int backoffContextOrder = -1; final LongRef scratch = new LongRef(-1L); for (int i = endPos - 1; i >= startPos; --i) { assert (probContext >= 0); probContext = localMap.getValueAndOffset(probContext, probContextOrder, ngram[i], scratch); if (probContext < 0) { return logProb; } else { final long currCount = scratch.value; long backoffCount = -1L; if (i == endPos - 1) { backoffCount = ((CountValueContainer) map.getValues()).getUnigramSum(); } else { backoffContext = localMap.getValueAndOffset(backoffContext, backoffContextOrder++, ngram[i], scratch); backoffCount = scratch.value; } logProb = (float) Math.log(currCount / ((float) backoffCount) * pow(alpha, i - startPos)); probContextOrder++; } } return logProb; }
localMap.getValueAndOffset(0, -1, ngram[endPos - 2], scratch); backoff = scratch.backoff; } else { backoffContext = localMap.getValueAndOffset(backoffContext, i - 1, ngram[endPos - i - 2], null); backoffContext = localMap.getValueAndOffset(backoffContext, backoffContextOrder, ngram[endPos - i - 2], scratch); if (backoffContext < 0) break; assert i > matchedProbContextOrder;
@Override public void initWithLengths(final List<Long> numNGrams) { map.initWithLengths(numNGrams); }
@Override public void cleanup() { if (failures.isEmpty() || !canFail) map.trim(); }
public long size() { return map.getNumNgrams(ngramOrder); } }
@Override public Iterator<Entry<List<W>, V>> iterator() { return new Iterators.Transform<NgramMap.Entry<V>, java.util.Map.Entry<List<W>, V>>(map.getNgramsForOrder(ngramOrder).iterator()) { @Override protected Entry<List<W>, V> transform(final edu.berkeley.nlp.lm.map.NgramMap.Entry<V> next) { return new java.util.Map.Entry<List<W>, V>() { @Override public List<W> getKey() { final List<W> ngram = WordIndexer.StaticMethods.toList(wordIndexer, next.key); return ngram; } @Override public V getValue() { return next.value; } @Override public V setValue(V arg0) { throw new UnsupportedOperationException("Method not yet implemented"); } }; } }; }