public double percentUnigramsInLM(Translation translation) { final LanguageModel nGramSource = getModel(translation.getSourceLabel().getLanguage()); if (nGramSource == null) { return 1.0; } final List<String> tokens = getTokens(translation.getSourceLabel()); int count = 0; for (String s : tokens) { if (Double.isInfinite(nGramSource.score(Arrays.asList(s)))) { count++; } } return (double) (tokens.size() - count) / tokens.size(); }
public double aveTranslationCount(Translation translation, double minProb) { final TranslationSource source = getSource(translation.getSourceLabel().getLanguage(), translation.getTargetLabel().getLanguage()); if (source == null) { return 0.0; } final double p = Math.log(minProb); final List<String> tokens = getTokens(translation.getSourceLabel()); int transCt = 0; for (String token : tokens) { final PhraseTable candidates = source.candidates(new ChunkImpl(token)); for (PhraseTableEntry entry : candidates) { if (entry.getFeatures()[2].score >= p) { transCt++; } } } return (double) transCt / tokens.size(); }
public double targetLMProb(Translation translation) { final LanguageModel model = getModel(translation.getTargetLabel().getLanguage()); if (model == null) { return 0; } return lmProb(model, getTokens(translation.getTargetLabel())); }
public double sourceLMProb(Translation translation) { final LanguageModel model = getModel(translation.getSourceLabel().getLanguage()); if (model == null) { return 0; } return lmProb(model, getTokens(translation.getSourceLabel())); }
public double[] percentNGramsInTopBotQuartile(Translation translation, int n) { final LanguageModel nGramSource = getModel(translation.getSourceLabel().getLanguage()); if (nGramSource == null) { return new double[]{0.0, 0.0};