/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
@Test public void testNoTimes_window2() { int[] x = new int[]{8,10,14,15}; int[] y = new int[]{1,4,6,12,17}; assertEquals(0, Distance.noTimes(new int[][]{x,y}, 2, 20)); x = new int[]{8}; y = new int[]{9}; //0:0-1 1:1-2 2:2-3 3:3-4 4:4-5 5:5-6 6:6-7 7:7-8 8:8-9 9:9-10 //10:10-11 11:11-12 12:12-13 13:13-14 14:14-15 15:15-16 16:16-17 17:18-18 18:18-19 //only 8-9 is a match assertEquals(1, Distance.noTimes(new int[][]{x,y}, 2, 20)); x = new int[]{8,10,14,15}; //8-9 and 9-10 are matches assertEquals(2, Distance.noTimes(new int[][]{x,y}, 2, 20)); y = new int[]{7}; //8-7 is a match assertEquals(1, Distance.noTimes(new int[][]{x,y}, 2, 20)); y = new int[]{7,9}; //(7,8), (8,9), (9,10) are matches assertEquals(3, Distance.noTimes(new int[][]{x,y}, 2, 20)); }
/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
@Test public void testNoTimes_2terms_window2() { for(int i=0;i<1000000;i++) { int[] x = new int[]{8,10,14,15}; int[] y = new int[]{1,4,6,12,17}; assertEquals(0, Distance.noTimes(x,y, 2, 20)); x = new int[]{8}; y = new int[]{9}; //0:0-1 1:1-2 2:2-3 3:3-4 4:4-5 5:5-6 6:6-7 7:7-8 8:8-9 9:9-10 //10:10-11 11:11-12 12:12-13 13:13-14 14:14-15 15:15-16 16:16-17 17:18-18 18:18-19 //only 8-9 is a match assertEquals(1, Distance.noTimes(x,y, 2, 20)); x = new int[]{8,10,14,15}; //8-9 and 9-10 are matches assertEquals(2, Distance.noTimes(x,y, 2, 20)); y = new int[]{7}; //8-7 is a match assertEquals(1, Distance.noTimes(x,y, 2, 20)); y = new int[]{7,9}; //(7,8), (8,9), (9,10) are matches assertEquals(3, Distance.noTimes(x,y, 2, 20)); } }