@Override public int order() { return languageModel.getOrder(); }
@Override public String getName() { return lm.getName(); }
@Override public List<String> preCase(List<String> label) { if (lm != null) { final ListIterator<String> iter = label.listIterator(); while (iter.hasNext()) { final String tokeni = iter.next(); final String lowerCaseTk = tokeni.toLowerCase(); if (!tokeni.equals(lowerCaseTk)) { final double origCaseScore = lm.score(Arrays.asList(tokeni)); final double lowerCaseScore = lm.score(Arrays.asList(lowerCaseTk)); if (!Double.isNaN(origCaseScore) && !Double.isNaN(lowerCaseScore) && !Double.isInfinite(origCaseScore) && !Double.isInfinite(lowerCaseScore) && lowerCaseScore - origCaseScore > 1.5) { iter.set(lowerCaseTk); } } } } return label; }
public double lmProb(LanguageModel model, List<String> tokens) { double lmSumScore = 0.0; for (int pos = 0; pos < tokens.size(); pos++) { final int seqStart = Math.max(0, pos - model.getOrder() + 1); final List<String> ngram = new ArrayList<String>(tokens.subList(seqStart, pos + 1)); final ListIterator<String> ngramIter = ngram.listIterator(); while (ngramIter.hasNext()) { ngramIter.set(ngramIter.next().toLowerCase()); } double ngramScore = model.score(ngram); if (ngramScore == Double.NEGATIVE_INFINITY || ngramScore != ngramScore) { lmSumScore += MOSES_LM_UNKNOWN_WORD_SCORE; continue; } lmSumScore += ngramScore; } return lmSumScore; }
@Override public boolean releventPrefix(Sequence<IString> sequence) { List<String> seq2 = new LinkedList<String>(); for (int i = 0; i < sequence.size(); i++) { seq2.add(sequence.get(i).word()); } return lm.isRelevantPrefix(seq2); } }
continue; final int quartile = nGramSource.quartile(Arrays.asList(ngram)); if (quartile == 0 || quartile == 1) { botCount++;
public static void main(String[] args) throws Exception { if(args.length != 1) { throw new IllegalArgumentException("Please specify language"); } final LMFactory lmFactory = new LMFactory(); final LanguageModel model = lmFactory.getModel(Language.get(args[0])); final BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; while((line = in.readLine()) != null) { System.out.println(model.score(Arrays.asList(line.split(" ")))); } } }
@Override public double[] get(Phrase phrase) { final List<String> ls = new ArrayList<String>(phrase.p.length); for (int i = 0; i < phrase.n; i++) { ls.add(invWordMap.get(phrase.p[i + phrase.l])); } final double[] score = new double[]{languageModel.score(ls)}; if (Double.isInfinite(score[0]) || Double.isNaN(score[0])) { // Fallback case... try the lower-cased form final ListIterator<String> lsi = ls.listIterator(); boolean differs = false; while (lsi.hasNext()) { final String s = lsi.next(); if (s != null) { final String sl = s.toLowerCase(); differs = (!s.equals(sl)) || differs; lsi.set(sl); } } if (differs) { return new double[]{languageModel.score(ls)}; } else { return score; } } else { return score; } }
@Override public int order() { return lm.getOrder(); }
@Override public double score(Sequence<IString> sequence) { List<String> seq2 = new LinkedList<String>(); for (int i = 0; i < sequence.size(); i++) { seq2.add(sequence.get(i).word()); } return lm.score(seq2); }
public double percentUnigramsInLM(Translation translation) { final LanguageModel nGramSource = getModel(translation.getSourceLabel().getLanguage()); if (nGramSource == null) { return 1.0; } final List<String> tokens = getTokens(translation.getSourceLabel()); int count = 0; for (String s : tokens) { if (Double.isInfinite(nGramSource.score(Arrays.asList(s)))) { count++; } } return (double) (tokens.size() - count) / tokens.size(); }