/** * Constructs an instance of FieldORIterablePosting. * @param ips * @throws IOException */ public FieldORIterablePosting(IterablePosting[] ips) throws IOException { super(ips); fieldCount = ((FieldPosting)ips[0]).getFieldFrequencies().length; fieldFreqs = new int[fieldCount]; fieldLens = new int[fieldCount]; }
@Override protected void firstPosting(Posting _p) { super.firstPosting(_p); FieldPosting p = (FieldPosting)_p; System.arraycopy(p.getFieldFrequencies(), 0, fieldFreqs, 0, fieldCount); System.arraycopy(p.getFieldLengths(), 0, fieldLens, 0, fieldCount); }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] fieldLengths = p.getFieldLengths(); final double[] normFieldFreqs = new double[fieldCount]; for(int i=0;i<fieldCount;i++) { if (tff[i] != 0.0d) normFieldFreqs[i] = fieldWeights[i] * fieldNormalisations[i].normalise(tff[i], fieldLengths[i], fieldGlobalFrequencies[i]); } final double tf = StaTools.sum(normFieldFreqs); //System.err.println("tf=" + tf); if (tf == 0.0d) return 0.0d; return basicModel.score(tf, super.documentFrequency, super.termFrequency, super.keyFrequency, p.getDocumentLength()); }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] lf = p.getFieldLengths(); //System.err.println("tff=" + ArrayUtils.join(tff, ",")); //System.err.println("lf=" + ArrayUtils.join(lf, ",")); assert lf != null : "No fields lengths from posting "; assert tff.length == lf.length : "Mismatch between lengths of field length and frequencies"; int tf = 0, l = 0; for(int fieldId : activeFieldIds) { tf += tff[fieldId]; l += lf[fieldId]; } if (tf == 0) return 0; assert l > 0 : "Frequency but no length for docid " + p.getId(); //System.err.println("tf=" + tf + " l="+l); final double rtr = basicModel.score(tf, l); if (Double.isNaN(rtr)) System.err.println("BPosting " + p.getId() + " had NaN : tf=" + tf + " l="+l+ " tf=" + org.terrier.utility.ArrayUtils.join(tff, ",") + " lf=" + org.terrier.utility.ArrayUtils.join(lf, ",")); return rtr; }
@Override public int[] getFieldLengths() { return ((FieldPosting)current).getFieldLengths(); }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Override public void setFieldLengths(int[] newLengths) { ((FieldPosting)current).setFieldLengths(newLengths); } }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] lf = p.getFieldLengths(); //System.err.println("tff=" + ArrayUtils.join(tff, ",")); //System.err.println("lf=" + ArrayUtils.join(lf, ",")); assert lf != null : "No fields lengths from posting "; assert tff.length == lf.length : "Mismatch between lengths of field length and frequencies"; int tf = 0, l = 0; for(int fieldId : activeFieldIds) { tf += tff[fieldId]; l += lf[fieldId]; } if (tf == 0) return 0; assert l > 0 : "Frequency but no length for docid " + p.getId(); //System.err.println("tf=" + tf + " l="+l); final double rtr = basicModel.score(tf, l); if (Double.isNaN(rtr)) System.err.println("BPosting " + p.getId() + " had NaN : tf=" + tf + " l="+l+ " tf=" + org.terrier.utility.ArrayUtils.join(tff, ",") + " lf=" + org.terrier.utility.ArrayUtils.join(lf, ",")); return rtr; }
@Override public int getDocumentLength() { return fieldParent.getFieldLengths()[fieldId]; }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Override public void setFieldLengths(int[] newLengths) { ((FieldPosting)current).setFieldLengths(newLengths); } }
/** * Constructs an instance of FieldORIterablePosting. * @param ips * @throws IOException */ public FieldORIterablePosting(IterablePosting[] ips) throws IOException { super(ips); fieldCount = ((FieldPosting)ips[0]).getFieldFrequencies().length; fieldFreqs = new int[fieldCount]; fieldLens = new int[fieldCount]; }
@Override protected void firstPosting(Posting _p) { super.firstPosting(_p); FieldPosting p = (FieldPosting)_p; System.arraycopy(p.getFieldFrequencies(), 0, fieldFreqs, 0, fieldCount); System.arraycopy(p.getFieldLengths(), 0, fieldLens, 0, fieldCount); }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] fieldLengths = p.getFieldLengths(); final double[] normFieldFreqs = new double[fieldCount]; for(int i=0;i<fieldCount;i++) { if (tff[i] != 0.0d) normFieldFreqs[i] = fieldWeights[i] * fieldNormalisations[i].normalise(tff[i], fieldLengths[i], fieldGlobalFrequencies[i]); } final double tf = StaTools.sum(normFieldFreqs); //System.err.println("tf=" + tf); if (tf == 0.0d) return 0.0d; return basicModel.score(tf, super.documentFrequency, super.termFrequency, super.keyFrequency, p.getDocumentLength()); }
@Override public int[] getFieldLengths() { return ((FieldPosting)current).getFieldLengths(); }
@Override public void setFieldLengths(int[] newLengths) { ((FieldPosting)current).setFieldLengths(newLengths); }
@Override public int[] getFieldFrequencies() { return ((FieldPosting)current).getFieldFrequencies(); }
double q = 1.0d; double tf_q = super.termFrequency; final int[] tff = fp.getFieldFrequencies(); final int[] fieldLengths = fp.getFieldLengths();
@Override public int[] getFieldLengths() { return ((FieldPosting)current).getFieldLengths(); }
@Override public void setFieldLengths(int[] newLengths) { ((FieldPosting)current).setFieldLengths(newLengths); }