@Override public int[] getPositions() { return ((BlockPosting)current).getPositions(); } }
@Override public int[] getPositions() { return ((BlockPosting)current).getPositions(); } }
@Override protected void addPosting(Posting p) { super.addPosting(p); positions.addAll(((BlockPosting)p).getPositions()); }
@Override protected void addPosting(Posting p) { super.addPosting(p); positions.addAll(((BlockPosting)p).getPositions()); }
@Override protected void addPosting(Posting p) { super.addPosting(p); positions.addAll(((BlockPosting)p).getPositions()); }
@Override protected void addPosting(Posting p) { super.addPosting(p); positions.addAll(((BlockPosting)p).getPositions()); }
for(int i=0;i<termCount;i++) pos[i] = ((BlockPosting)ips[i]).getPositions(); if (pos[i].length < posSmall)
/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
/** * how likely is it that these two postings have so many near-occurrences, * given the length of this document */ protected double scoreFDSD(boolean SD, int i, final Posting ip1, int j, final Posting ip2, final double _avgDocLen) { final int[] blocks1 = ((BlockPosting) ip1).getPositions(); final int[] blocks2 = ((BlockPosting) ip2).getPositions(); int docLength = ip1.getDocumentLength(); final int matchingNGrams = SD ? Distance.noTimesSameOrder(blocks1, blocks2, ngramLength, docLength) : Distance.noTimes(blocks1, blocks2, ngramLength, docLength); //System.err.println(this.getClass().getSimpleName() + " matchingNGrams="+matchingNGrams); final double s = scoreFDSD(matchingNGrams, docLength); if (Double.isNaN(s)) { System.err.println(this.getClass().getSimpleName() + " returned NaN for document " + ip1.getId() + " "+i+","+j+" pf="+matchingNGrams + " l="+ docLength); } return s; }
String getContentsBlocks(IterablePosting ip, Lexicon<String> lex) throws Exception { //this assumes block.size is 1. StringBuilder rtr = new StringBuilder(); int termid; TIntObjectHashMap<String> pos2term = new TIntObjectHashMap<String>(); BlockPosting bp = (BlockPosting) ip; while( (termid = ip.next()) != IterablePosting.END_OF_LIST){ String term = lex.getLexiconEntry(termid).getKey(); int[] positions = bp.getPositions(); for(int pos : positions) { pos2term.put(pos, term); } } int[] positions = pos2term.keys(); Arrays.sort(positions); for(int pos : positions){ rtr.append(pos2term.get(pos)); rtr.append(' '); } return rtr.toString(); }
String getContentsBlocks(IterablePosting ip, Lexicon<String> lex) throws Exception { //this assumes block.size is 1. StringBuilder rtr = new StringBuilder(); int termid; TIntObjectHashMap<String> pos2term = new TIntObjectHashMap<String>(); BlockPosting bp = (BlockPosting) ip; while( (termid = ip.next()) != IterablePosting.END_OF_LIST){ String term = lex.getLexiconEntry(termid).getKey(); int[] positions = bp.getPositions(); for(int pos : positions) { pos2term.put(pos, term); } } int[] positions = pos2term.keys(); Arrays.sort(positions); for(int pos : positions){ rtr.append(pos2term.get(pos)); rtr.append(' '); } return rtr.toString(); }
@Override protected void writePostingNotDocid(Posting _p) throws IOException { super.writePostingNotDocid(_p); final BlockPosting p = (BlockPosting)_p; final int positions[] = p.getPositions(); final int l = positions.length; //System.err.println("posting has " + l + "blocks"); output.writeUnary(l+1); if (l == 0) return; output.writeGamma(positions[0]+1); for(int i=1;i<l;i++) { output.writeGamma(positions[i] - positions[i-1]); } }
@Override protected void writePostingNotDocid(Posting _p) throws IOException { BlockPosting p = (BlockPosting)_p; output.writeUnary(p.getFrequency()); final int positions[] = p.getPositions(); final int l = positions.length; output.writeUnary(l+1); if (l== 0) return; //System.err.println("posting has " + l + "blocks"); output.writeGamma(positions[0]+1); for(int i=1;i<l;i++) { output.writeGamma(positions[i] - positions[i-1]); } }
@Override protected void firstPosting(Posting p) { super.firstPosting(p); if (positions.size() > 30) { positions.clear(); positions.compact(); } else { positions.clear(); } positions.addAll(((BlockPosting)p).getPositions()); }
@Override protected void firstPosting(Posting p) { super.firstPosting(p); if (positions.size() > 30) { positions.clear(); positions.compact(); } else { positions.clear(); } positions.addAll(((BlockPosting)p).getPositions()); }
@Override protected void firstPosting(Posting p) { super.firstPosting(p); if (positions.size() > 30) { positions.clear(); positions.compact(); } else { positions.clear(); } positions.addAll(((BlockPosting)p).getPositions()); }
@Override protected void firstPosting(Posting p) { super.firstPosting(p); if (positions.size() > 30) { positions.clear(); positions.compact(); } else { positions.clear(); } positions.addAll(((BlockPosting)p).getPositions()); }
public static void compareBlockPostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); assertArrayEquals(((BlockPosting) p).getPositions(), ((BlockPosting) p).getPositions()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
public static void compareBlockFieldPostings(List<Posting> inputPostings, IterablePosting outputPostings) throws Exception { for(Posting p : inputPostings) { assertEquals(p.getId(), outputPostings.next()); assertEquals(p.getId(), outputPostings.getId()); assertEquals(p.getFrequency(), outputPostings.getFrequency()); assertArrayEquals(((FieldPosting) p).getFieldFrequencies(), ((FieldPosting) p).getFieldFrequencies()); assertArrayEquals(((BlockPosting) p).getPositions(), ((BlockPosting) p).getPositions()); } assertTrue(outputPostings.next() == IterablePosting.EOL); assertTrue(outputPostings.getId() == IterablePosting.EOL); //TR-519 }
protected void testInverted(Index index) throws Exception { PostingIndexInputStream piis = (PostingIndexInputStream) index.getIndexStructureInputStream("inverted"); IterablePosting ip = null; BlockPosting bp = null; while(piis.hasNext()) { ip = piis.getNextPostings(); if (ip == null) continue; bp = (BlockPosting) ip; while(ip.next() != IterablePosting.EOL) { int tf = bp.getFrequency(); int[] blocks = bp.getPositions(); assertTrue("blocks.length="+blocks.length + " tf="+tf, blocks.length <= tf); assertTrue("blocks.length="+blocks.length + " tf="+tf + ", blocks longer than max "+ maxBlocks, blocks.length <= maxBlocks); } } piis.close(); } }