Refine search
final DocsEnum[] docsEnums = new DocsEnum[ subReaders.size() ]; for ( int subIDX = 0; subIDX < subReaders.size(); subIDX++ ) { termsEnums[ subIDX ] = subReaders.get( subIDX ).reader().fields().terms( "id" ).iterator( null ); final BytesRef id = new BytesRef( ids[ idx ] ); for ( int subIDX = 0; subIDX < subReaders.size(); subIDX++ ) { final AtomicReader sub = subReaders.get( subIDX ).reader(); final TermsEnum termsEnum = termsEnums[ subIDX ]; if ( termsEnum.seekExact( id, false ) ) { final DocsEnum docs = docsEnums[ subIDX ] = termsEnum.docs( sub.getLiveDocs(), docsEnums[ subIDX ], 0 ); if ( docs != null ) { final int docID = docs.nextDoc(); base += sub.maxDoc();
private TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException { final TermState state = termStates.get(context.ord); if (state == null) { // term is not present in that reader assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; return null; } final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); termsEnum.seekExact(term.bytes(), state); return termsEnum; }
private synchronized int[] getAllowedLuceneIds(AtomicReaderContext context) throws IOException { AtomicReader reader = context.reader(); if (allowedLuceneIds.containsKey(reader)) { return allowedLuceneIds.get(reader); } LOG.debug("building WpId filter for " + wpIds.length + " ids with hash " + Arrays.hashCode(wpIds)); TIntSet wpIdSet = new TIntHashSet(wpIds); TIntSet luceneIdSet = new TIntHashSet(); Set<String> fields = new HashSet<String>(Arrays.asList(LuceneOptions.LOCAL_ID_FIELD_NAME)); for (int i = 0; i < reader.numDocs(); i++) { Document d = reader.document(i, fields); int wpId = Integer.valueOf(d.get(LuceneOptions.LOCAL_ID_FIELD_NAME)); if (wpIdSet.contains(wpId)) { luceneIdSet.add(i); } } int luceneIds[] = luceneIdSet.toArray(); LOG.debug("WpId filter matched " + luceneIds.length + " ids."); allowedLuceneIds.put(reader, luceneIds); return luceneIds; } }
@Override public int advance(int target) throws IOException { Bits liveDocs = reader.getLiveDocs(); for (int i = target; i < reader.maxDoc(); ++i) { if (liveDocs != null && !liveDocs.get(i)) { continue; } doc = i; return doc; } return NO_MORE_DOCS; }
private void init(AtomicReader reader) throws IOException { int maxDoc = reader.maxDoc(); _uidArray = new long[maxDoc]; NumericDocValues uidValues = reader .getNumericDocValues(AbstractZoieIndexable.DOCUMENT_ID_PAYLOAD_FIELD); Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < maxDoc; ++i) { if (liveDocs != null && !liveDocs.get(i)) { _uidArray[i] = ZoieSegmentReader.DELETED_UID; continue; } _uidArray[i] = uidValues.get(i); } }
@Test public void testSkipDataCheckIndex() throws IOException { // The Lucene CheckIndex was catching a problem with how skip data level // were computed on this configuration. this.setPostingsFormat(new Siren10VIntPostingsFormat(256)); final MockSirenDocument[] docs = new MockSirenDocument[1000]; for (int i = 0; i < 1000; i++) { docs[i] = doc(token("aaa", node(1)), token("bbb", node(1,0)), token("aaa", node(2))); } this.addDocuments(docs); final AtomicReader aReader = SlowCompositeReaderWrapper.wrap(reader); final DocsEnum docsEnum = aReader.termDocsEnum(new Term(DEFAULT_TEST_FIELD, new BytesRef("aaa"))); assertTrue(docsEnum instanceof Siren10DocsEnum); }
public void load(String fieldName, AtomicReader reader, TermListFactory<T> listFactory) throws IOException { String field = fieldName.intern(); int maxDoc = reader.maxDoc(); freqList.add(0); int totalFreq = 0; Terms terms = reader.terms(field); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); String strText = text.utf8ToString(); list.add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.termDocsEnum(term); int minID = -1; int maxID = -1; this.freqs[0] = reader.numDocs() - totalFreq;
@Override public void getDocumentsWithWordAsSet(String word, IntOpenHashSet documents) { DocsEnum docs = null; Term term = new Term(fieldName, word); try { int baseDocId; for (int i = 0; i < reader.length; i++) { docs = reader[i].termDocsEnum(term); baseDocId = contexts[i].docBase; if (docs != null) { while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { documents.add(baseDocId + docs.docID()); } } } } catch (IOException e) { LOGGER.error("Error while requesting documents for word \"" + word + "\".", e); } }
protected void requestDocumentsWithWord(String word, IntObjectOpenHashMap<IntArrayList[]> positionsInDocs, IntIntOpenHashMap docLengths, int wordId, int numberOfWords) { DocsAndPositionsEnum docPosEnum = null; Term term = new Term(fieldName, word); int localDocId, globalDocId, baseDocId; IntArrayList positions[]; try { for (int i = 0; i < reader.length; i++) { docPosEnum = reader[i].termPositionsEnum(term); baseDocId = contexts[i].docBase; if (docPosEnum != null) { docLengths.put(globalDocId, reader[i].document(localDocId).getField(docLengthFieldName) .numericValue().intValue());
private void advanceScorer () throws IOException { while (nextReader < leaves.size()) { leaf = leaves.get(nextReader++); scorer = weight.scorer(leaf, ordered, false, leaf.reader().getLiveDocs()); // NB: arg 3 (topScorer) was 'true' prior to 4.1 upgrade but incorrectly I think?? if (scorer != null) { return; } } scorer = null; }
builder.startObject() .field("shardId", shardId) .field("numDeletions", reader.numDeletedDocs()); builder.startArray("docs"); FieldInfos fieldInfos = reader.getFieldInfos(); Bits live = MultiFields.getLiveDocs(reader); for (int docNum = 0; docNum < reader.maxDoc(); docNum++) { Document doc = reader.document(docNum); if (live != null && live.get(docNum)) { continue; // not deleted continue; // no data for this term in this doc String text = te.term().utf8ToString(); List<Integer> positions = new ArrayList(); List<Integer> starts = new ArrayList();
protected int getNegativeValueCount(AtomicReader reader, String field) throws IOException { int ret = 0; Terms terms = reader.terms(field); if (terms == null) { return ret; } TermsEnum termsEnum = terms.iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { if (!text.utf8ToString().startsWith("-")) { break; } ret++; } return ret; }
@Override public void collect(int doc) throws IOException { idValues.setDocument(doc); if (idValues.count() > 0) { assert idValues.count() == 1; BytesRef id = idValues.valueAt(0); fieldsVisitor.reset(); reader.document(doc, fieldsVisitor); try { // id is only used for logging, if we fail we log the id in the catch statement final QueryAndSource queryAndSource = percolator.parsePercolatorDocument(null, fieldsVisitor.source()); queries.put(id.utf8ToString(), queryAndSource); } catch (Exception e) { logger.warn("failed to add query [{}]", e, id.utf8ToString()); } } else { logger.error("failed to load query since field [{}] not present", ID_FIELD); } }
@Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { AtomicReader reader = context.reader(); Fields fields = reader.fields(); Terms terms = fields.terms(VISIBILITY_FIELD_NAME); if (terms == null) { return null; } else { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); TermsEnum iterator = terms.iterator(null); BytesRef bytesRef; while ((bytesRef = iterator.next()) != null) { makeVisible(iterator, bitSet, acceptDocs, isVisible(visibilityEvaluator, bytesRef)); } return BitsFilteredDocIdSet.wrap(bitSet, acceptDocs); } }
ImageHashScorer(Weight weight, BitSet bitSet, AtomicReaderContext context, Bits liveDocs) { super(weight, luceneFieldName, lireFeature, context.reader(), ImageHashLimitQuery.this.getBoost()); this.bitSet = bitSet; this.liveDocs = liveDocs; maxDoc = context.reader().maxDoc(); docBase = context.docBase; }
ReaderData(IndexReader reader) throws IOException { this.reader = reader; long minUID = Long.MAX_VALUE; long maxUID = Long.MIN_VALUE; uidMap = new Long2IntRBTreeMap(); uidMap.defaultReturnValue(-1); int maxDoc = reader.maxDoc(); if (maxDoc == 0) { _minUID = Long.MIN_VALUE; _maxUID = Long.MIN_VALUE; return; } List<AtomicReaderContext> leaves = reader.getContext().leaves(); for (AtomicReaderContext context : leaves) { AtomicReader atomicReader = context.reader(); NumericDocValues uidValues = atomicReader .getNumericDocValues(AbstractZoieIndexable.DOCUMENT_ID_PAYLOAD_FIELD); Bits liveDocs = atomicReader.getLiveDocs(); for (int i = 0; i < atomicReader.maxDoc(); ++i) { if (liveDocs == null || liveDocs.get(i)) { long uid = uidValues.get(i); if (uid < minUID) minUID = uid; if (uid > maxUID) maxUID = uid; uidMap.put(uid, i); } } } _minUID = minUID; _maxUID = maxUID; }