Refine search
private int getPhraseScore(final ComplexQueryData data, final int docBase, final PostingsEnum postingsEnum) throws IOException { int weight = 0; while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int docId = postingsEnum.docID(); if (data.documentIds.has(docBase + docId)) { IntsHolder positions = data.scorer.getPositions(docBase + docId); if (positions == null) { continue; } int freq = postingsEnum.freq(); for (int i = 0; i < freq; i++) { int pos = postingsEnum.nextPosition(); if (positions.has(pos)) { weight++; } } } } return weight; }
private void captureLead(PhrasePositions pp) throws IOException { if (captureLeadMatch == false) { return; } leadOrd = pp.ord; leadPosition = pp.position + pp.offset; leadOffset = pp.postings.startOffset(); leadEndOffset = pp.postings.endOffset(); }
SMat S; Terms terms = this.luceneUtils.getTermsForField(contentsField); TermsEnum termsEnumForCount = terms.iterator(); int numTerms = 0, nonZeroVals = 0; BytesRef bytes; while ((bytes = termsEnumForCount.next()) != null) { Term term = new Term(contentsField, bytes); if (this.luceneUtils.termFilter(term)) numTerms++; while (docsEnum.nextDoc() != PostingsEnum.NO_MORE_DOCS) { ++nonZeroVals; TermsEnum termsEnum = terms.iterator(); int termCounter = 0; int firstNonZero = 0; // Index of first non-zero entry (document) of each column (term). while((bytes = termsEnum.next()) != null) { Term term = new Term(contentsField, bytes); termList[termCounter] = term.text(); while (docsEnum.nextDoc() != PostingsEnum.NO_MORE_DOCS) { S.rowind[firstNonZero] = docsEnum.docID(); // set row index to document number float value = luceneUtils.getGlobalTermWeight(term); //global weight value = value * (float) luceneUtils.getLocalTermWeight(docsEnum.freq()); // multiply by local weight
long totalTermFreq = 0; while(true) { final int doc = postings.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; int freq = -1; if (hasFreqs) { freq = postings.freq(); if (freq <= 0) { throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); if (postings.freq() != 1) { throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false"); if (hasPositions) { for(int j=0;j<freq;j++) { final int pos = postings.nextPosition(); BytesRef payload = postings.getPayload(); int startOffset = postings.startOffset(); int endOffset = postings.endOffset(); final int docID = postings.advance(skipDocID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; final int freq = postings.freq();
" Doc count: " + terms.getDocCount() + "\n" + " Sum doc freq: " + terms.getSumDocFreq() + "\n" + " Sum total term freq: " + terms.getSumDocFreq() + "\n" + " TERM '" + termsEnum.term().utf8ToString() + "':\n" + " Doc freq: " + termsEnum.docFreq() + "\n" + while (docPosEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { " Doc id: " + docPosEnum.docID() + "\n" + " Freq: " + docPosEnum.freq() + "\n"); for (int i = 0; i < docPosEnum.freq(); i++) { int position = docPosEnum.nextPosition(); int start = docPosEnum.startOffset(); if (start >= 0) { int end = docPosEnum.startOffset(); System.out.println(" " + position + " (offsets: " + start + "-" + end + ")"); } else { while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { System.out.println( " Doc id: " + postingsEnum.docID() + "\n" + " Freq: " + postingsEnum.freq() + "\n");
for (final LeafReaderContext ctx : r.leaves()) { final LeafReader ar = ctx.reader(); final Terms terms = ar.terms(Consts.FULL); TermsEnum te = terms.iterator(); while (te.next() != null) { FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString())); final int ordinal = addCategory(cp); docs = te.postings(docs, PostingsEnum.NONE); ordinalMap.addMapping(docs.nextDoc() + base, ordinal); base += ar.maxDoc(); // no deletions, so we're ok
final boolean hasPositions = terms.hasPositions(); final boolean hasOffsets = terms.hasOffsets(); final boolean hasPayloads = terms.hasPayloads(); assert !hasPayloads || hasPositions; termsEnum = terms.iterator(); while(termsEnum.next() != null) { numTerms++; while(termsEnum.next() != null) { termCount++; final int freq = (int) termsEnum.totalTermFreq(); startTerm(termsEnum.term(), freq); assert docsAndPositionsEnum != null; final int docID = docsAndPositionsEnum.nextDoc(); assert docID != DocIdSetIterator.NO_MORE_DOCS; assert docsAndPositionsEnum.freq() == freq; final int pos = docsAndPositionsEnum.nextPosition(); final int startOffset = docsAndPositionsEnum.startOffset(); final int endOffset = docsAndPositionsEnum.endOffset(); final BytesRef payload = docsAndPositionsEnum.getPayload();
PostingsEnum postingsEnum = null; for(Term deleteTerm : deleteTerms) { if (deleteTerm.field().equals(lastField) == false) { lastField = deleteTerm.field(); Terms terms = fields.terms(lastField); if (terms != null) { termsEnum = terms.iterator(); } else { termsEnum = null; if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) { postingsEnum = termsEnum.postings(postingsEnum, 0); int delDocLimit = segDeletes.get(deleteTerm); assert delDocLimit < PostingsEnum.NO_MORE_DOCS; while (true) { int doc = postingsEnum.nextDoc(); if (doc < delDocLimit) { if (state.liveDocs == null) {
@Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { String desc = "weight(" + getQuery() + " in " + doc + ") [" + function + "]"; Terms terms = context.reader().terms(fieldName); if (terms == null) { return Explanation.noMatch(desc + ". Field " + fieldName + " doesn't exist."); } TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(new BytesRef(featureName)) == false) { return Explanation.noMatch(desc + ". Feature " + featureName + " doesn't exist."); } PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS); if (postings.advance(doc) != doc) { return Explanation.noMatch(desc + ". Feature " + featureName + " isn't set."); } return function.explain(fieldName, featureName, boost, doc, postings.freq()); }
BytesRef term = new BytesRef(); term.bytes = new byte[16]; BytesRef scratch = new BytesRef(); scratch.bytes = new byte[16]; Terms terms = segState.reader.terms(termField); if (terms != null) { termsEnum = terms.iterator(); } else { termsEnum = null; if (termsEnum.seekExact(term)) { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); DocValuesFieldUpdates dvUpdates = holder.get(updateField); if (dvUpdates == null) { while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (acceptDocs == null || acceptDocs.get(doc)) { while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (doc >= limit) { break; // no more docs that can be updated for this term
final int advanceDoc = postingsDocs.advance(j); if (advanceDoc != j) { throw new RuntimeException("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")"); final int doc = postings.nextDoc(); final int tf = postings.freq(); if (postingsHasFreq && postingsDocs.freq() != tf) { throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs.freq()); int pos = postings.nextPosition(); if (postingsTerms.hasPositions()) { int postingsPos = postingsDocs.nextPosition(); final int startOffset = postings.startOffset(); final int endOffset = postings.endOffset(); int postingsStartOffset = postingsDocs.startOffset(); int postingsEndOffset = postingsDocs.endOffset(); if (startOffset != postingsStartOffset) { throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset); BytesRef payload = postings.getPayload(); if (postingsDocs.getPayload() != null) { throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsDocs.getPayload()); if (postingsDocs.getPayload() == null) { throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");
private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard, LeafReader leafReader, IntConsumer consumer) throws IOException { Terms terms = leafReader.terms(idField); TermsEnum iterator = terms.iterator(); BytesRef idTerm; PostingsEnum postingsEnum = null; while ((idTerm = iterator.next()) != null) { if (includeInShard.test(idTerm) == false) { postingsEnum = iterator.postings(postingsEnum); int doc; while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { consumer.accept(doc); } } } }
@Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Terms terms = context.reader().terms(field); if (terms != null) { TermsEnum segmentTermsEnum = terms.iterator(); BytesRef spare = new BytesRef(); PostingsEnum postingsEnum = null; for (int i = 0; i < TermsIncludingScoreQuery.this.terms.size(); i++) { if (segmentTermsEnum.seekExact(TermsIncludingScoreQuery.this.terms.get(ords[i], spare))) { postingsEnum = segmentTermsEnum.postings(postingsEnum, PostingsEnum.NONE); if (postingsEnum.advance(doc) == doc) { final float score = TermsIncludingScoreQuery.this.scores[ords[i]]; return Explanation.match(score, "Score based on join value " + segmentTermsEnum.term().utf8ToString()); } } } } return Explanation.noMatch("Not a match"); }
@Override public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException { Terms terms = context.reader().terms(UidFieldMapper.NAME); if (terms == null) { return null; TermsEnum termsEnum = terms.iterator(); BytesRefBuilder uidSpare = new BytesRefBuilder(); BytesRef idSpare = new BytesRef(); acceptDocs = context.reader().getLiveDocs(); parentIds.get(i, idSpare); BytesRef uid = Uid.createUidAsBytes(parentTypeBr, idSpare, uidSpare); if (termsEnum.seekExact(uid)) { docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE); int docId; for (docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { if (acceptDocs == null || acceptDocs.get(docId)) { break; if (expectedCardinality >= (context.reader().maxDoc() >>> 10)) { result = new FixedBitSet(context.reader().maxDoc()); } else { assert docsEnum.advance(docId + 1) == DocIdSetIterator.NO_MORE_DOCS : "DocId " + docId + " should have been the last one but docId " + docsEnum.docID() + " exists.";
Map<String, List<Integer>> termToPositions = new HashMap<>(); Terms t = leaf.terms(field); TermsEnum tenum = t.iterator(); while ((termBytes = tenum.next()) != null) { termToPositions.put(termBytes.utf8ToString(), positions); postings = tenum.postings(postings); postings.advance(0); for (int i = 0; i < postings.freq(); i++) { positions.add(postings.nextPosition());
public void termPostingsList(String field, String termText) throws IOException { /* Note this method only iterates through the termpostings of the first segement in the index i.e. reader.leaves().get(0).reader(); To go through all term postings list for a term, you need to iterate over both the segements, and the leafreaders. */ LeafReader leafReader = reader.leaves().get(0).reader(); Terms terms = leafReader.terms(field); TermsEnum te = terms.iterator(); te.seekCeil(new BytesRef(termText)); PostingsEnum postings = te.postings(null); int doc; while ((doc = postings.nextDoc()) != PostingsEnum.NO_MORE_DOCS) { System.out.println(doc); // you can also iterate positions for each doc int position; int numPositions = postings.freq(); for (int i = 0; i < numPositions; i++) { int pos = postings.nextPosition(); if (pos > 0){ //Only prints out the positions if they are indexed System.out.println(pos); } } } }
Terms vector = indexReader.getTermVector(id, field); if (vector != null) { TermsEnum vectorEnum = vector.iterator(); BytesRef text; while ((text = vectorEnum.next()) != null) { String term = text.utf8ToString(); PostingsEnum postings = vectorEnum.postings(null, PostingsEnum.POSITIONS); while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int freq = postings.freq(); while (freq-- > 0) logger.info("Position: {}", postings.nextPosition()); } } }
final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()]; for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) { termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator(); final BytesRef id = new BytesRef(ids[idx]); final TermsEnum termsEnum = termsEnums[subIDX]; if (termsEnum.seekExact(id)) { final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX], 0); final int docID = docs.nextDoc(); Bits liveDocs = subReader.getLiveDocs(); base += subReader.maxDoc();