protected void firstPosting(Posting p) { doclen = p.getDocumentLength(); frequency = p.getFrequency(); }
@Test public void testTwoDocumentsTwoTerms() throws Exception { ResultSet rs = super._testTwoDocumentsTwoTerms(); assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(2, postings.length); assertEquals(1, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(8, postings[0].getDocumentLength()); assertEquals(1, postings[1].getId()); assertEquals(1, postings[1].getFrequency()); assertEquals(8, postings[1].getDocumentLength()); postings = ((FatCandidateResultSet)rs).getPostings()[1]; assertEquals(2, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); assertNull(postings[1]); }
public static void comparePostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
protected void addPosting(Posting p) { frequency += p.getFrequency(); }
p.setId(termcodeHashmap.get(postings.getId())); postingList.add(p);
@Test public void testSingleDocumentIndexMatching() throws Exception { ResultSet rs = super._testSingleDocumentIndexMatching(); //get postings from ResultSet for first ranked document assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(1, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); }
public static void compareBlockPostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); assertArrayEquals(((BlockPosting) p).getPositions(), ((BlockPosting) p).getPositions()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
protected void addPosting(Posting p) { frequency += p.getFrequency(); }
@Test public void testSingleDocumentIndexMatchingFields() throws Exception { ResultSet rs = _testSingleDocumentIndexMatchingFields(); //get postings from ResultSet for first ranked document assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(1, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); FieldPosting p = (FieldPosting)postings[0]; assertEquals(1, p.getFieldFrequencies()[0]); assertEquals(0, p.getFieldFrequencies()[1]); assertEquals(2, p.getFieldLengths()[0]); assertEquals(7, p.getFieldLengths()[1]); }
public static void compareFieldPostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); assertArrayEquals(((FieldPosting) p).getFieldFrequencies(), ((FieldPosting) p).getFieldFrequencies()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
protected void firstPosting(Posting p) { doclen = p.getDocumentLength(); frequency = p.getFrequency(); }
/** Hook method for writing out the remainder of the posting */ protected void writePostingNotDocid(Posting p) throws IOException { output.writeUnary(p.getFrequency()); }
public static void compareBlockFieldPostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); assertArrayEquals(((FieldPosting) p).getFieldFrequencies(), ((FieldPosting) p).getFieldFrequencies()); assertArrayEquals(((BlockPosting) p).getPositions(), ((BlockPosting) p).getPositions()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
/** * Returns score * @param p * @return score */ public double score(Posting p) { return this.score(p.getFrequency(), p.getDocumentLength()); }