/** * Constructs a new Index object. Don't call this method, * call the createIndex(String, String) factory method to * construct an Index object. */ public static IndexOnDisk createIndex() { return createIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX); }
@Override public void configureIndex(Index index) { lastIndex = index; documentIndex = index.getDocumentIndex(); invertedIndex = index.getInvertedIndex(); lexicon = index.getLexicon(); collStats = index.getCollectionStatistics(); directIndex = index.getDirectIndex(); metaIndex = index.getMetaIndex(); } /** Obtain the query expansion model for QE to use.
/** tell the index about a new input stream index structure it provides. */ public void addIndexStructureInputStream(String structureName, String className, String paramTypes, String paramValues) { addIndexStructure(structureName + "-inputstream", className, paramTypes, paramValues); }
/** Create a posting list manager for the given index and statistics */ @SuppressWarnings("unchecked") protected PostingListManager(Index _index, CollectionStatistics cs) throws IOException { index = _index; lexicon = index.getLexicon(); invertedIndex = (PostingIndex<Pointer>) index.getInvertedIndex(); collectionStatistics = cs; }
public ScoringMatching(Index _index, Matching _parent, WeightingModel _wm, Predicate<Pair<String,Set<String>>> _filter) { super(_index, _parent, _wm, _filter); if (this.index != null) { this.lexicon = index.getLexicon(); this.invertedIndex = index.getInvertedIndex(); this.cs = index.getCollectionStatistics(); } }
@Override @SuppressWarnings("unchecked") public int run(String[] args) throws Exception { Index.setIndexLoadingProfileAsRetrieval(false); if (args.length < 2) final Index index = Index.createIndex(); if (index == null) System.err.println("Index not found: " + Index.getLastIndexLoadError()); return 2; PostingIndexInputStream bpiis = (PostingIndexInputStream) index.getIndexStructureInputStream(structureName); bpiis.print(); bpiis.close(); Lexicon<String> lex = index.getLexicon(); PostingIndex<?> inv = (PostingIndex<?>) index.getInvertedIndex(); LexiconEntry le = lex.getLexiconEntry(args[1]); IterablePosting ip = inv.getPostings(le); Lexicon<String> lex = index.getLexicon(); PostingIndex<Pointer> inv = (PostingIndex<Pointer>) index.getInvertedIndex(); LexiconEntry le = lex.getLexiconEntry(args[1]); IterablePosting ip = inv.getPostings(le); List<BitIndexPointer> pointerList = (List<BitIndexPointer>) index.getIndexStructure(args[2]); PostingIndex<?> bpi = (PostingIndex<?>) index.getIndexStructure(structureName); Iterator<? extends Writable> in = (Iterator<? extends Writable>) index.getIndexStructureInputStream(structureName);
trecQuerying.close(); } else if (printdocid) { Index.setIndexLoadingProfileAsRetrieval(false); Index i = Index.createIndex(); if (i == null) logger.error("No such index : "+ Index.getLastIndexLoadError()); } else { IndexUtil.printDocumentIndex(i, "document"); i.close(); Index.setIndexLoadingProfileAsRetrieval(false); Index i = Index.createIndex(); if (i == null) logger.error("No such index : "+ Index.getLastIndexLoadError()); i.close(); Index.setIndexLoadingProfileAsRetrieval(false); Index i = Index.createIndex(); if (i == null) logger.error("No such index : "+ Index.getLastIndexLoadError()); else if (! i.hasIndexStructureInputStream("lexicon")) Index.setIndexLoadingProfileAsRetrieval(false); Index i = Index.createIndex();
return 1; PostingIndex<?> di = index.hasIndexStructure("direct") ? index.getDirectIndex() : null; DocumentIndex doi = index.getDocumentIndex(); MetaIndex meta = index.getMetaIndex(); boolean printmeta = line.hasOption("meta"); boolean blocks = false; System.out.println("Contents: "); if (blocks) System.out.println(getContentsBlocks(ip, index.getLexicon())); else System.out.println(getContentsNoBlocksFreq(ip, index.getLexicon())); } else { System.err.println("No direct index data structure"); index.close(); return 0;
public static Index makeConcurrentForRetrieval(Index index) { if (index.hasIndexStructure("document") && ! index.getDocumentIndex().getClass().isAnnotationPresent(ConcurrentReadable.class) ) DocumentIndex oldDoi = index.getDocumentIndex(); if (oldDoi instanceof FieldDocumentIndex) newDoi = new ConcurrentFieldDocumentIndex((FieldDocumentIndex)oldDoi); if (index.hasIndexStructure("inverted") && ! index.getInvertedIndex().getClass().isAnnotationPresent(ConcurrentReadable.class) ) PostingIndex<?> inv = index.getInvertedIndex(); if (inv instanceof BitPostingIndex) if (index.hasIndexStructure("lexicon") && ! index.getLexicon().getClass().isAnnotationPresent(ConcurrentReadable.class) ) Lexicon<String> oldLex = index.getLexicon(); Lexicon<String> newLex = new ConcurrentLexicon(oldLex); IndexUtil.forceStructure(index, "lexicon", newLex); if (index.hasIndexStructure("meta") && ! index.getMetaIndex().getClass().isAnnotationPresent(ConcurrentReadable.class) ) MetaIndex oldmeta = index.getMetaIndex(); MetaIndex newmeta = new ConcurrentMetaIndex(oldmeta); IndexUtil.forceStructure(index, "meta", newmeta);
assertEquals(2, index.getIntIndexProperty("index.inverted.fields.count", -1)); assertEquals(2, index.getIntIndexProperty("index.direct.fields.count", -1)); assertTrue("Constructor for lexicon-value type is incorrect", index.getIndexProperty("index.lexicon-valuefactory.parameter_values", "").length() >0); assertEquals("TITLE,SPEAKER", index.getIndexProperty("index.inverted.fields.names", null)); assertEquals("TITLE,SPEAKER", index.getIndexProperty("index.direct.fields.names", null)); System.err.println("Field tokens=" + index.getIntIndexProperty("num.field.0.Tokens", -1) + "," + index.getIntIndexProperty("num.field.1.Tokens", -1)); assertEquals(123, index.getIntIndexProperty("num.field.0.Tokens", -1)); assertEquals(611, index.getIntIndexProperty("num.field.1.Tokens", -1)); assertEquals(2, index.getCollectionStatistics().getNumberOfFields()); assertEquals(123, index.getCollectionStatistics().getFieldTokens()[0]); assertEquals(611, index.getCollectionStatistics().getFieldTokens()[1]); bpiis = (PostingIndexInputStream) index.getIndexStructureInputStream("direct"); ip = bpiis.next(); assertTrue(ip instanceof FieldPosting); bpiis.close(); bpiis = (PostingIndexInputStream) index.getIndexStructureInputStream("inverted"); ip = bpiis.next(); assertTrue(ip instanceof FieldPosting); bpi = (PostingIndex<Pointer>) index.getInvertedIndex(); ip = bpi.getPostings(index.getLexicon().getLexiconEntry(0).getValue()); assertTrue(ip instanceof FieldPosting); bpi = (PostingIndex<Pointer>) index.getDirectIndex(); ip = bpi.getPostings(index.getDocumentIndex().getDocumentEntry(0)); assertTrue(ip instanceof FieldPosting);
@Test public void testParallel() throws Exception { PrintWriter p = new PrintWriter(Files.writeFileWriter(ApplicationSetup.COLLECTION_SPEC)); p.println(System.getProperty("user.dir") + "/../../share/tests/shakespeare/shakespeare-merchant.trec.1"); p.println(System.getProperty("user.dir") + "/../../share/tests/shakespeare/shakespeare-merchant.trec.2"); p.close(); CLITool.main(new String[]{"batchindexing", "-b", "-p"}); Index index = Index.createIndex(); assertNotNull(index); assertEquals(22, index.getCollectionStatistics().getNumberOfDocuments()); assertTrue( index.getDirectIndex().getPostings(index.getDocumentIndex().getDocumentEntry(0)) instanceof BlockPosting ); }
Index index = Index.createIndex(); assertNotNull("Failed to get an index", index); final String[] expectedStructures = new String[]{ assertTrue("Index has no "+ structureName + " structure", index.hasIndexStructure(structureName)); for (String structureName : expectedStructuresInputStream ) assertTrue("Index has no "+ structureName + " inputstream structure", index.hasIndexStructure(structureName)); assertTrue(index.getProperties().containsKey("termpipelines")); checkInvertedIndexStream(index, DOCUMENT_LENGTHS, DOCUMENT_UNIQUE_TERMS); checkDirectIndex(index, index.getCollectionStatistics().getNumberOfUniqueTerms(), index.getCollectionStatistics().getNumberOfUniqueTerms(), DOCUMENT_LENGTHS, DOCUMENT_UNIQUE_TERMS, true); assertTrue("LexiconEntry is not of type FieldLexiconEntry", ((FixedSizeWriteableFactory<LexiconEntry>)index.getIndexStructure("lexicon-valuefactory")).newInstance() instanceof FieldLexiconEntry); assertTrue("DocumentIndexEntry is not of type FieldDocumentIndexEntry", ((FixedSizeWriteableFactory<DocumentIndexEntry>)index.getIndexStructure("document-factory")).newInstance() instanceof FieldDocumentIndexEntry); LexiconEntry le = ((FixedSizeWriteableFactory<LexiconEntry>)index.getIndexStructure("lexicon-valuefactory")).newInstance(); assertTrue("LexiconEntry is not of type BasicLexiconEntry", (le instanceof BasicLexiconEntry) || (le instanceof Tr4BasicLexiconEntry) ); index.close(); super.checkIndex();
protected void doIndexing(String... trec_terrier_args) throws Exception { String path = ApplicationSetup.TERRIER_INDEX_PATH; String prefix = ApplicationSetup.TERRIER_INDEX_PREFIX; TrecTerrier.main(joinSets(trec_terrier_args, indexingOptions)); //check that application setup hasnt changed unexpectedly assertEquals(path, ApplicationSetup.TERRIER_INDEX_PATH); assertEquals(prefix, ApplicationSetup.TERRIER_INDEX_PREFIX); //check that indexing actually created an index assertTrue("Index does not exist at ["+ApplicationSetup.TERRIER_INDEX_PATH+","+ApplicationSetup.TERRIER_INDEX_PREFIX+"]", Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)); IndexOnDisk i = Index.createIndex(); assertNotNull(Index.getLastIndexLoadError(), i); assertEquals(ApplicationSetup.TERRIER_VERSION,i.getIndexProperty("index.terrier.version", "")); assertTrue("Index does not have an inverted structure", i.hasIndexStructure("inverted")); assertTrue("Index does not have an lexicon structure", i.hasIndexStructure("lexicon")); assertTrue("Index does not have an document structure", i.hasIndexStructure("document")); assertTrue("Index does not have an meta structure", i.hasIndexStructure("meta")); addDirectStructure(i); i.close(); finishIndexing(); }
new String[]{"doc1"}, new String[]{"The <TITLE>quick brown</TITLE> fox jumps over the lazy dog"}); System.err.println("_testSingleDocumentIndexMatchingFields: " + index.toString()); assertNotNull(index); assertEquals(1, index.getCollectionStatistics().getNumberOfDocuments()); assertEquals(2, index.getCollectionStatistics().getNumberOfFields()); assertEquals(2, index.getCollectionStatistics().getFieldTokens()[0]); assertEquals(7, index.getCollectionStatistics().getFieldTokens()[1]); assertEquals(9, index.getDocumentIndex().getDocumentLength(0)); Matching matching = makeMatching(index); assertNotNull(matching); assertEquals( index.getCollectionStatistics().getNumberOfDocuments(), fatIndex.getCollectionStatistics().getNumberOfDocuments()); assertEquals(index.getCollectionStatistics().getNumberOfFields(), fatIndex.getCollectionStatistics().getNumberOfFields()); assertEquals(index.getCollectionStatistics().getFieldTokens()[0], fatIndex.getCollectionStatistics().getFieldTokens()[0]); assertEquals(index.getCollectionStatistics().getFieldTokens()[1], fatIndex.getCollectionStatistics().getFieldTokens()[1]); assertNotNull(fatIndex.getLexicon()); LexiconEntry le = fatIndex.getLexicon().getLexiconEntry("quick"); assertNotNull(le); assertEquals(1, le.getFrequency()); PostingIndex<?> inv = fatIndex.getInvertedIndex(); assertNotNull(inv); IterablePosting ip = inv.getPostings(le);
@Override public void checkIndex(BatchEndToEndTest test, Index index) throws Exception { //no check correct type of all structures PostingIndexInputStream bpiis; IterablePosting ip; PostingIndex<Pointer> bpi; //check stream structures bpiis = (PostingIndexInputStream) index.getIndexStructureInputStream("direct"); ip = bpiis.next(); assertTrue(ip instanceof BlockPosting); bpiis.close(); bpiis = (PostingIndexInputStream) index.getIndexStructureInputStream("inverted"); ip = bpiis.next(); assertTrue(ip instanceof BlockPosting); bpiis.close(); //check random structures bpi = (PostingIndex<Pointer>) index.getInvertedIndex(); ip = bpi.getPostings(index.getLexicon().getLexiconEntry(0).getValue()); assertTrue(ip instanceof BlockPosting); bpi = (PostingIndex<Pointer>) index.getDirectIndex(); ip = bpi.getPostings(index.getDocumentIndex().getDocumentEntry(0)); assertTrue(ip instanceof BlockPosting); } }
/** {@inheritDoc} */ public MetaIndex getMetaIndex() { int ondisk = indices.size(); int[] offsets = new int[ondisk]; MetaIndex[] metas = new MetaIndex[ondisk]; int i =0; for (Index index : selectiveMatchingPolicy.getSelectedIndices(indices)) { metas[i] = index.getMetaIndex(); offsets[i] = index.getCollectionStatistics() .getNumberOfDocuments(); i++; } return new MultiMeta(metas, offsets); }
/** Update the index's properties for this structure */ public void writeIndexProperties(Index index, String pointerSourceStream) { index.addIndexStructure( this.structureName, this.getStructureClass().getName(), "org.terrier.structures.IndexOnDisk,java.lang.String,java.lang.Class", "index,structureName,"+ this.getPostingIteratorClass().getName() ); index.addIndexStructureInputStream( this.structureName, this.getStructureInputStreamClass().getName(), "org.terrier.structures.IndexOnDisk,java.lang.String,java.util.Iterator,java.lang.Class", "index,structureName,"+pointerSourceStream+","+ this.getPostingIteratorClass().getName() ); index.setIndexProperty("index."+this.structureName+".fields.count", String.valueOf(this.fieldCount)); index.setIndexProperty("index."+this.structureName+".fields.names", ArrayUtils.join(this.fieldNames, ",")); index.setIndexProperty("index."+this.structureName+".blocks", String.valueOf(this.hasBlocks)); index.setIndexProperty("index."+this.structureName+".blocks.max", String.valueOf(this.maxBlocks)); } }
/** * {@inheritDoc} */ public void setIndex(Index index){ metaIndex = index.getMetaIndex(); } /**
@SuppressWarnings("deprecation") @Override public int run(String[] args) { Index.setIndexLoadingProfileAsRetrieval(false); Index i = IndexFactory.of(IndexRef.of(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)); System.out.println("Collection statistics:"); System.out.println("number of indexed documents: " + i.getCollectionStatistics().getNumberOfDocuments()); System.out.println("size of vocabulary: " + i.getCollectionStatistics().getNumberOfUniqueTerms()); System.out.println("number of tokens: " + i.getCollectionStatistics().getNumberOfTokens()); System.out.println("number of pointers: " + i.getCollectionStatistics().getNumberOfPointers()); try { i.close(); } catch (IOException e) {} return 0; }
@SuppressWarnings("unchecked") public void checkDocumentLengths(Index index, int[] lengths, int[] document_unique_terms) throws Exception { int docid = -1; if (index.hasIndexStructureInputStream("document")) Iterator<DocumentIndexEntry> iDie = (Iterator<DocumentIndexEntry>) index.getIndexStructureInputStream("document"); assertNotNull("Failed to get a document inputstream", iDie); while(iDie.hasNext()) DocumentIndex di = index.getDocumentIndex(); assertNotNull("Failed to get a document index", di); final int numberOfDocuments = index.getCollectionStatistics().getNumberOfDocuments(); for(docid =0; docid < numberOfDocuments; docid++)