public DocumentIndexEntry getDocumentEntry(int docid) throws IOException { synchronized (parent) { return parent.getDocumentEntry(docid); } }
public DocumentIndexEntry getDocumentEntry(int docid) throws IOException { synchronized (parent) { return parent.getDocumentEntry(docid); } }
/** {@inheritDoc} */ public DocumentIndexEntry getDocumentEntry(int docid) throws IOException { int offset = 0, i = 0; //System.err.println("MultiDoc: Getting docid="+docid); for (DocumentIndex doc : docs) { if (docid < (offsets[i] + offset)) { //System.err.println("Found docid="+docid+", is local docid "+(docid-offset)+" in shard "+i+" and has length "+d.getDocumentLength()+" ("+d.pointerToString()+")"); return new MultiDocumentEntry(doc.getDocumentEntry(docid - offset),i); } offset += offsets[i++]; } return null; }
@Override public int[] getFieldLengths() { if (fieldsCount <= 0) throw new UnsupportedOperationException(); try { if (asFieldDocumentIndex != null) { return asFieldDocumentIndex.getFieldLengths(id); } else { FieldDocumentIndexEntry fdie = ((FieldDocumentIndexEntry)documentIndex.getDocumentEntry(id)); return fdie.getFieldLengths(); } } catch (IOException ioe) { logger.error("Problem looking for doclength for document "+ id, ioe); return new int[0]; } }
@Override public int[] getFieldLengths() { if (fieldsCount <= 0) throw new UnsupportedOperationException(); try { if (asFieldDocumentIndex != null) { return asFieldDocumentIndex.getFieldLengths(id); } else { FieldDocumentIndexEntry fdie = ((FieldDocumentIndexEntry)documentIndex.getDocumentEntry(id)); return fdie.getFieldLengths(); } } catch (IOException ioe) { logger.error("Problem looking for doclength for document "+ id, ioe); return new int[0]; } }
@Override public int[] getFieldLengths() { if (fieldsCount <= 0) throw new UnsupportedOperationException(); try { if (asFieldDocumentIndex != null) { return asFieldDocumentIndex.getFieldLengths(id); } else { FieldDocumentIndexEntry fdie = ((FieldDocumentIndexEntry)documentIndex.getDocumentEntry(id)); return fdie.getFieldLengths(); } } catch (IOException ioe) { logger.error("Problem looking for doclength for document "+ id, ioe); return new int[0]; } }
/** {@inheritDoc} */ public int[] getFieldLengths() { if (doiIsFieldDocumentIndex) { try{ return fdoi.getFieldLengths(id); } catch (IOException ioe) { System.err.println("Problem looking for doclength for document "+ id); ioe.printStackTrace(); return new int[0]; } } else { FieldDocumentIndexEntry fdie = null; try{ fdie = ((FieldDocumentIndexEntry)doi.getDocumentEntry(id)); } catch (IOException ioe) { //TODO log? System.err.println("Problem looking for doclength for document "+ id); ioe.printStackTrace(); return new int[0]; } return fdie.getFieldLengths(); } }
@Override public int[] getFieldLengths() { if (fieldsCount <= 0) throw new UnsupportedOperationException(); try { if (asFieldDocumentIndex != null) { return asFieldDocumentIndex.getFieldLengths(id); } else { FieldDocumentIndexEntry fdie = ((FieldDocumentIndexEntry)documentIndex.getDocumentEntry(id)); return fdie.getFieldLengths(); } } catch (IOException ioe) { logger.error("Problem looking for doclength for document "+ id, ioe); return new int[0]; } }
/** {@inheritDoc} */ public int[] getFieldLengths() { if (doiIsFieldDocumentIndex) { try{ return fdoi.getFieldLengths(id); } catch (IOException ioe) { System.err.println("Problem looking for doclength for document "+ id); ioe.printStackTrace(); return new int[0]; } } else { FieldDocumentIndexEntry fdie = null; try{ fdie = ((FieldDocumentIndexEntry)doi.getDocumentEntry(id)); } catch (IOException ioe) { //TODO log? System.err.println("Problem looking for doclength for document "+ id); ioe.printStackTrace(); return new int[0]; } return fdie.getFieldLengths(); } }
/** {@inheritDoc} */ public int[] getFieldLengths() { if (doiIsFieldDocumentIndex) { try{ return fdoi.getFieldLengths(id); } catch (IOException ioe) { System.err.println("Problem looking for doclength for document "+ id); ioe.printStackTrace(); return new int[0]; } } else { FieldDocumentIndexEntry fdie = null; try{ fdie = ((FieldDocumentIndexEntry)doi.getDocumentEntry(id)); } catch (IOException ioe) { //TODO log? System.err.println("Problem looking for doclength for document "+ id); ioe.printStackTrace(); return new int[0]; } return fdie.getFieldLengths(); } }
/** {@inheritDoc} */ public int[] getFieldLengths() { if (doiIsFieldDocumentIndex) { try{ return fdoi.getFieldLengths(id); } catch (IOException ioe) { System.err.println("Problem looking for doclength for document "+ id); ioe.printStackTrace(); return new int[0]; } } else { FieldDocumentIndexEntry fdie = null; try{ fdie = ((FieldDocumentIndexEntry)doi.getDocumentEntry(id)); } catch (IOException ioe) { //TODO log? System.err.println("Problem looking for doclength for document "+ id); ioe.printStackTrace(); return new int[0]; } return fdie.getFieldLengths(); } }
/** * Returns a five dimensional array containing the * term ids and the term frequencies for the given document. * @return int[][] a five dimensional array containing * the term ids, frequencies, field scores, * block frequencies and the containing the block ids. * @param docid the id of the document whose terms we are looking for. */ public int[][] getTerms(int docid) throws IOException { DocumentIndexEntry de = docIndex.getDocumentEntry(docid); if (de == null) return null; if (de.getNumberOfEntries() == 0) return null; return getTerms(de); }
/** * Returns a five dimensional array containing the * term ids and the term frequencies for the given document. * @return int[][] a five dimensional array containing * the term ids, frequencies, field scores, * block frequencies and the containing the block ids. * @param docid the id of the document whose terms we are looking for. */ public int[][] getTerms(int docid) throws IOException { DocumentIndexEntry de = docIndex.getDocumentEntry(docid); if (de == null) return null; if (de.getNumberOfEntries() == 0) return null; return getTerms(de); }
/** * Returns a two dimensional array containing the * term ids and the term frequencies for * the given document. * @return int[][] the two dimensional [n][3] array * containing the term ids, frequencies and field scores. If * the given document identifier is not found in the document * index, then the method returns null. If fields are not used, * then the dimension of the returned array are [n][2]. * @param docid the document identifier of the document which terms * we retrieve. */ public int[][] getTerms(int docid) throws IOException { DocumentIndexEntry de = docIndex.getDocumentEntry(docid); if (de == null) return null; if (de.getNumberOfEntries() == 0) return null; return getTerms(de); } /**
/** * Returns a two dimensional array containing the * term ids and the term frequencies for * the given document. * @return int[][] the two dimensional [n][3] array * containing the term ids, frequencies and field scores. If * the given document identifier is not found in the document * index, then the method returns null. If fields are not used, * then the dimension of the returned array are [n][2]. * @param docid the document identifier of the document which terms * we retrieve. */ public int[][] getTerms(int docid) throws IOException { DocumentIndexEntry de = docIndex.getDocumentEntry(docid); if (de == null) return null; if (de.getNumberOfEntries() == 0) return null; return getTerms(de); } /**
/** * main * @param args * @throws Exception */ public static void main (String args[]) throws Exception { IndexOnDisk index = Index.createIndex(); if (index == null) { System.err.println("Couldn't load index: " + Index.getLastIndexLoadError()); return; } PostingIndex<Pointer> direct = (PostingIndex<Pointer>) index.getDirectIndex(); DocumentIndex doc = index.getDocumentIndex(); DocumentIndexEntry die = doc.getDocumentEntry(Integer.parseInt(args[0])); System.err.println("docid" + args[0] + " pointer = "+ die.toString()); IterablePosting pi = direct.getPostings(die); System.out.print(args[0] + " "); while(pi.next() != IterablePosting.EOL) { System.out.print("(" + pi.getId() + ", " + pi.getFrequency() + ") "); } System.out.println(); }
/** * Adds the feedback document from the index given a docid */ public void insertDocument(int docid, int rank, double score) throws IOException { totalDocumentLength += documentIndex.getDocumentLength(docid); final IterablePosting ip = directIndex.getPostings((BitIndexPointer)documentIndex.getDocumentEntry(docid)); if (ip == null) { logger.warn("document id "+docid+" not found"); return; } while(ip.next() != IterablePosting.EOL) { this.insertTerm(ip.getId(), ip.getFrequency()); } feedbackDocumentCount++; }
/** * Adds the feedback document from the index given a docid */ public void insertDocument(int docid, int rank, double score) throws IOException { totalDocumentLength += documentIndex.getDocumentLength(docid); final IterablePosting ip = directIndex.getPostings((BitIndexPointer)documentIndex.getDocumentEntry(docid)); if (ip == null) { logger.warn("document id "+docid+" not found"); return; } while(ip.next() != IterablePosting.EOL) { this.insertTerm(ip.getId(), ip.getFrequency()); } feedbackDocumentCount++; }
@Test public void testParallel() throws Exception { PrintWriter p = new PrintWriter(Files.writeFileWriter(ApplicationSetup.COLLECTION_SPEC)); p.println(System.getProperty("user.dir") + "/../../share/tests/shakespeare/shakespeare-merchant.trec.1"); p.println(System.getProperty("user.dir") + "/../../share/tests/shakespeare/shakespeare-merchant.trec.2"); p.close(); CLITool.main(new String[]{"batchindexing", "-b", "-p"}); Index index = Index.createIndex(); assertNotNull(index); assertEquals(22, index.getCollectionStatistics().getNumberOfDocuments()); assertTrue( index.getDirectIndex().getPostings(index.getDocumentIndex().getDocumentEntry(0)) instanceof BlockPosting ); }
@Test public void test() throws Exception { PrintWriter p = new PrintWriter(Files.writeFileWriter(ApplicationSetup.COLLECTION_SPEC)); p.println(System.getProperty("user.dir") + "/../../share/tests/shakespeare/shakespeare-merchant.trec.1"); p.println(System.getProperty("user.dir") + "/../../share/tests/shakespeare/shakespeare-merchant.trec.2"); p.close(); CLITool.main(new String[]{"batchindexing", "-b"}); Index index = Index.createIndex(); assertNotNull(index); assertEquals(22, index.getCollectionStatistics().getNumberOfDocuments()); assertTrue( index.getDirectIndex().getPostings(index.getDocumentIndex().getDocumentEntry(0)) instanceof BlockPosting ); }