/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
/** Write out the specified postings, but allowing the delta for the first document to be adjusted * @param iterator an Iterator of Posting objects * @param previousId id of the previous posting in this stream */ @Override public BitIndexPointer writePostings(Iterator<Posting> iterator, int previousId) throws IOException { BitIndexPointer pointer = new SimpleBitIndexPointer(); pointer.setOffset(output.getByteOffset(), output.getBitOffset()); int numberOfEntries = 0; Posting posting = null; while(iterator.hasNext()) { posting = iterator.next(); output.writeGamma(posting.getId() - previousId); lastDocid = previousId = posting.getId(); writePostingNotDocid(posting); numberOfEntries++; } pointer.setNumberOfEntries(numberOfEntries); return pointer; }
public static void comparePostingsDocids(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); System.err.println(outputPostings.getId()); if (outputPostings.getId() == 2 ) { System.err.println("at 2"); } } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
public static void comparePostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
public static void compareBlockPostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); assertArrayEquals(((BlockPosting) p).getPositions(), ((BlockPosting) p).getPositions()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
public static void compareFieldPostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); assertArrayEquals(((FieldPosting) p).getFieldFrequencies(), ((FieldPosting) p).getFieldFrequencies()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
public static void compareBlockFieldPostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); assertArrayEquals(((FieldPosting) p).getFieldFrequencies(), ((FieldPosting) p).getFieldFrequencies()); assertArrayEquals(((BlockPosting) p).getPositions(), ((BlockPosting) p).getPositions()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
@Test public void testSingleDocumentIndexMatching() throws Exception { ResultSet rs = super._testSingleDocumentIndexMatching(); //get postings from ResultSet for first ranked document assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(1, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); }
@Test public void testTwoDocumentsTwoTerms() throws Exception { ResultSet rs = super._testTwoDocumentsTwoTerms(); assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(2, postings.length); assertEquals(1, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(8, postings[0].getDocumentLength()); assertEquals(1, postings[1].getId()); assertEquals(1, postings[1].getFrequency()); assertEquals(8, postings[1].getDocumentLength()); postings = ((FatCandidateResultSet)rs).getPostings()[1]; assertEquals(2, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); assertNull(postings[1]); }
@Test public void testSingleDocumentIndexMatchingFields() throws Exception { ResultSet rs = _testSingleDocumentIndexMatchingFields(); //get postings from ResultSet for first ranked document assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(1, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); FieldPosting p = (FieldPosting)postings[0]; assertEquals(1, p.getFieldFrequencies()[0]); assertEquals(0, p.getFieldFrequencies()[1]); assertEquals(2, p.getFieldLengths()[0]); assertEquals(7, p.getFieldLengths()[1]); }