protected void firstPosting(Posting p) { doclen = p.getDocumentLength(); frequency = p.getFrequency(); }
protected void firstPosting(Posting p) { doclen = p.getDocumentLength(); frequency = p.getFrequency(); }
/** * Returns score * @param p * @return score */ public double score(Posting p) { return this.score(p.getFrequency(), p.getDocumentLength()); }
/** * Returns score * @param p * @return score */ public double score(Posting p) { return this.score(p.getFrequency(), p.getDocumentLength()); }
/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Test public void testSingleDocumentIndexMatching() throws Exception { ResultSet rs = super._testSingleDocumentIndexMatching(); //get postings from ResultSet for first ranked document assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(1, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); }
@Test public void testTwoDocumentsTwoTerms() throws Exception { ResultSet rs = super._testTwoDocumentsTwoTerms(); assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(2, postings.length); assertEquals(1, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(8, postings[0].getDocumentLength()); assertEquals(1, postings[1].getId()); assertEquals(1, postings[1].getFrequency()); assertEquals(8, postings[1].getDocumentLength()); postings = ((FatCandidateResultSet)rs).getPostings()[1]; assertEquals(2, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); assertNull(postings[1]); }
@Test public void testSingleDocumentIndexMatchingFields() throws Exception { ResultSet rs = _testSingleDocumentIndexMatchingFields(); //get postings from ResultSet for first ranked document assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(1, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); FieldPosting p = (FieldPosting)postings[0]; assertEquals(1, p.getFieldFrequencies()[0]); assertEquals(0, p.getFieldFrequencies()[1]); assertEquals(2, p.getFieldLengths()[0]); assertEquals(7, p.getFieldLengths()[1]); }