/** * Construct a <code>TermDocIdSetIterator</code>. * @param term Term * @param reader * IndexReader. */ public TermDocIdSetIterator(Term term, AtomicReader reader) throws IOException { docsEnum = reader.termDocsEnum(term); }
@Override public DocIdSetIterator iterator() throws IOException { final DocsEnum docsEnum = reader.termDocsEnum(term); if (docsEnum == null) { return EmptyDocIdSet.getInstance().iterator(); } return docsEnum; }; }
@Override public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) { DocsEnum docs = null; Term term = new Term(fieldName, word); try { int baseDocId; for (int i = 0; i < reader.length; i++) { docs = reader[i].termDocsEnum(term); baseDocId = contexts[i].docBase; if (docs != null) { while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { documents.add(baseDocId + docs.docID()); } } } } catch (IOException e) { LOGGER.error("Error while requesting documents for word \"" + word + "\".", e); } }
@Override public void getDocumentsWithWord(String word, IntArrayList documents) { DocsEnum docs = null; Term term = new Term(fieldName, word); try { int baseDocId; for (int i = 0; i < reader.length; i++) { docs = reader[i].termDocsEnum(term); baseDocId = contexts[i].docBase; if (docs != null) { while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { documents.add(docs.docID() + baseDocId); } } } } catch (IOException e) { LOGGER.error("Error while requesting documents for word \"" + word + "\".", e); } } }
@Test public void testSkipDataCheckIndex() throws IOException { // The Lucene CheckIndex was catching a problem with how skip data level // were computed on this configuration. this.setPostingsFormat(new Siren10VIntPostingsFormat(256)); final MockSirenDocument[] docs = new MockSirenDocument[1000]; for (int i = 0; i < 1000; i++) { docs[i] = doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))); } this.addDocuments(docs); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); }
@Test public void testSkipDataCheckIndex() throws IOException { // The Lucene CheckIndex was catching a problem with how skip data level // were computed on this configuration. this.setPostingsFormat(new Siren10VIntPostingsFormat(256)); final MockSirenDocument[] docs = new MockSirenDocument[1000]; for (int i = 0; i < 1000; i++) { docs[i] = doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))); } this.addDocuments(docs); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); }
@Test public void testSimpleSkipNode() throws IOException { this.addDocuments( doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))), doc(token("aaa", node(1,0)), token("bbb", node(1,0,1,0))), doc(token("aaa", node(5,3,6,3)), token("bbb", node(5,3,6,3,7))) ); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertEquals(-1, e.doc()); assertEquals(0, e.nodeFreqInDoc()); // skip to 2 using linear scan. Node should be also be skipped. assertTrue(e.skipTo(2)); assertEquals(2, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(5,3,6,3), e.node()); assertFalse(e.nextNode()); assertFalse(e.nextDocument()); }
@Test public void testSimpleSkipNode() throws IOException { this.addDocuments( doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))), doc(token("aaa", node(1,0)), token("bbb", node(1,0,1,0))), doc(token("aaa", node(5,3,6,3)), token("bbb", node(5,3,6,3,7))) ); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertEquals(-1, e.doc()); assertEquals(0, e.nodeFreqInDoc()); // skip to 2 using linear scan. Node should be also be skipped. assertTrue(e.skipTo(2)); assertEquals(2, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextNode()); assertEquals(node(5,3,6,3), e.node()); assertFalse(e.nextNode()); assertFalse(e.nextDocument()); }
final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
@Test public void testSimpleNextDocument() throws IOException { this.addDocuments( doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))), doc(token("aaa", node(1,0)), token("bbb", node(1,0,1,0))), doc(token("aaa", node(5,3,6,3)), token("bbb", node(5,3,6,3,7))) ); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, "aaa")); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertEquals(-1, e.doc()); assertEquals(0, e.nodeFreqInDoc()); assertTrue(e.nextDocument()); assertEquals(0, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextDocument()); assertEquals(1, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextDocument()); assertEquals(2, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertFalse(e.nextDocument()); assertEquals(DocsAndNodesIterator.NO_MORE_DOC, e.doc()); }
@Test public void testSkipDoc() throws IOException { final MockSirenDocument[] docs = new MockSirenDocument[2048]; for (int i = 0; i < 2048; i += 4) { docs[i] = doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))); docs[i + 1] = doc(token("aaa", node(1,0)), token("bbb", node(1,0,1,0))); docs[i + 2] = doc(token("aaa", node(5,3,6,3)), token("bbb", node(5,3,6,3,7))); docs[i + 3] = doc(token("bbb", node(2,0)), token("aaa", node(5,3,6))); } this.addDocuments(docs); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); // first skip in skiplist is at 512 assertTrue(e.skipTo(502)); assertEquals(502, e.doc()); assertEquals(1, e.nodeFreqInDoc()); // must have used the second skip assertTrue(e.skipTo(1624)); assertEquals(1624, e.doc()); assertEquals(2, e.nodeFreqInDoc()); // no other skip, must have used the linear scan assertTrue(e.skipTo(2000)); assertEquals(2000, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertFalse(e.skipTo(256323)); }
@Test public void testSimpleNextDocument() throws IOException { this.addDocuments( doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))), doc(token("aaa", node(1,0)), token("bbb", node(1,0,1,0))), doc(token("aaa", node(5,3,6,3)), token("bbb", node(5,3,6,3,7))) ); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, "aaa")); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); assertEquals(-1, e.doc()); assertEquals(0, e.nodeFreqInDoc()); assertTrue(e.nextDocument()); assertEquals(0, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertTrue(e.nextDocument()); assertEquals(1, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertTrue(e.nextDocument()); assertEquals(2, e.doc()); assertEquals(1, e.nodeFreqInDoc()); assertFalse(e.nextDocument()); assertEquals(DocsAndNodesIterator.NO_MORE_DOC, e.doc()); }
@Test public void testSkipDoc() throws IOException { final MockSirenDocument[] docs = new MockSirenDocument[2048]; for (int i = 0; i < 2048; i += 4) { docs[i] = doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))); docs[i + 1] = doc(token("aaa", node(1,0)), token("bbb", node(1,0,1,0))); docs[i + 2] = doc(token("aaa", node(5,3,6,3)), token("bbb", node(5,3,6,3,7))); docs[i + 3] = doc(token("bbb", node(2,0)), token("aaa", node(5,3,6))); } this.addDocuments(docs); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum(); // first skip in skiplist is at 512 assertTrue(e.skipTo(502)); assertEquals(502, e.doc()); assertEquals(1, e.nodeFreqInDoc()); // must have used the second skip assertTrue(e.skipTo(1624)); assertEquals(1624, e.doc()); assertEquals(2, e.nodeFreqInDoc()); // no other skip, must have used the linear scan assertTrue(e.skipTo(2000)); assertEquals(2000, e.doc()); assertEquals(2, e.nodeFreqInDoc()); assertFalse(e.skipTo(256323)); }
final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();
final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); final Siren10DocsNodesAndPositionsEnum e = ((Siren10DocsEnum) docsEnum).getDocsNodesAndPositionsEnum();