@Override public final float score(BasicStats stats, float tfn) { long N = stats.getNumberOfDocuments(); long F = stats.getTotalTermFreq(); return tfn * (float)(log2(1 + (N + 1) / (F + 0.5))); }
@Override public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { PerFieldSimWeight weight = new PerFieldSimWeight(); weight.delegate = get(collectionStats.field()); weight.delegateWeight = weight.delegate.computeWeight(boost, collectionStats, termStats); return weight; }
@Override protected float score(BasicStats stats, float freq, float docLen) { final float expected = (stats.getTotalTermFreq() + 1) * docLen / (stats.getNumberOfFieldTokens() + 1); // if the observed frequency is less than or equal to the expected value, then return zero. if (freq <= expected) return 0; final float measure = independence.score(freq, expected); return stats.getBoost() * (float) log2(measure + 1); }
@Override public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { SimWeight subStats[] = new SimWeight[sims.length]; for (int i = 0; i < subStats.length; i++) { subStats[i] = sims[i].computeWeight(boost, collectionStats, termStats); } return new MultiStats(subStats); }
@Override public final float score(BasicStats stats, float tfn) { double F = stats.getTotalTermFreq() + 1 + tfn; // approximation only holds true when F << N, so we use N += F double N = F + stats.getNumberOfDocuments(); return (float)(-log2((N - 1) * Math.E) + f(N + F - 1, N + F - tfn - 2) - f(F, F - tfn)); }
@Override public SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException { SimScorer subScorers[] = new SimScorer[sims.length]; for (int i = 0; i < subScorers.length; i++) { subScorers[i] = sims[i].simScorer(((MultiStats)stats).subStats[i], context); } return new MultiSimScorer(subScorers); }
@Override protected void explain( List<Explanation> subs, BasicStats stats, int doc, float freq, float docLen) { if (stats.getBoost() != 1.0f) { subs.add(Explanation.match(stats.getBoost(), "boost")); } Explanation normExpl = normalization.explain(stats, freq, docLen); Explanation lambdaExpl = lambda.explain(stats); subs.add(normExpl); subs.add(lambdaExpl); subs.add(distribution.explain(stats, normExpl.getValue(), lambdaExpl.getValue())); }
/** * The name of IB methods follow the pattern * {@code IB <distribution> <lambda><normalization>}. The name of the * distribution is the same as in the original paper; for the names of lambda * parameters, refer to the javadoc of the {@link Lambda} classes. */ @Override public String toString() { return "IB " + distribution.toString() + "-" + lambda.toString() + normalization.toString(); }
/** * Computes the collection probability of the current term in addition to the * usual statistics. */ @Override protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) { super.fillBasicStats(stats, collectionStats, termStats); LMStats lmStats = (LMStats) stats; lmStats.setCollectionProbability(collectionModel.computeProbability(stats)); }
@Override public final float score(BasicStats stats, float tfn) { long N = stats.getNumberOfDocuments(); long n = stats.getDocFreq(); return tfn * (float)(log2((N + 1) / (n + 0.5))); }
@Override public final float tfn(BasicStats stats, float tf, float len) { return (float)(tf * log2(1 + c * stats.getAvgFieldLength() / len)); }
@Override public final float score(BasicStats stats, float tfn) { long F = stats.getTotalTermFreq()+1; long n = stats.getDocFreq()+1; return (F + 1) / (n * (tfn + 1)); }
@Override public float score(int doc, float freq) throws IOException { // We have to supply something in case norms are omitted return SimilarityBase.this.score(stats, freq, getLengthValue(doc)); }
@Override public Explanation explain(int doc, Explanation freq) throws IOException { return SimilarityBase.this.explain(stats, doc, freq, getLengthValue(doc)); }
@Override public final float score(BasicStats stats, float tfn) { // just like in BE, approximation only holds true when F << N, so we use lambda = F / (N + F) double F = stats.getTotalTermFreq() + 1; double N = stats.getNumberOfDocuments(); double lambda = F / (N + F); // -log(1 / (lambda + 1)) -> log(lambda + 1) return (float)(log2(lambda + 1) + tfn * log2((1 + lambda) / lambda)); }
@Override public final float score(BasicStats stats, float tfn) { // we have to ensure phi is always < 1 for tiny TTF values, otherwise nphi can go negative, // resulting in NaN. cleanest way is to unconditionally always add tfn to totalTermFreq // to create a 'normalized' F. double F = stats.getTotalTermFreq() + 1 + tfn; double phi = (double)tfn / F; double nphi = 1 - phi; double p = 1.0 / (stats.getNumberOfDocuments() + 1); double D = phi * log2(phi / p) + nphi * log2(nphi / (1 - p)); return (float)(D * F + 0.5 * log2(1 + 2 * Math.PI * tfn * nphi)); }
@Override public final float score(BasicStats stats, float tfn) { float lambda = (float)(stats.getTotalTermFreq()+1) / (stats.getNumberOfDocuments()+1); return (float)(tfn * log2(tfn / lambda) + (lambda + 1 / (12 * tfn) - tfn) * LOG2_E + 0.5 * log2(2 * Math.PI * tfn)); }