org.apache.lucene.index.PostingsEnum java code examples

Refine search

private int getPhraseScore(final ComplexQueryData data, final int docBase, final PostingsEnum postingsEnum)
    throws IOException {
  int weight = 0;
  while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
    int docId = postingsEnum.docID();
    if (data.documentIds.has(docBase + docId)) {
      IntsHolder positions = data.scorer.getPositions(docBase + docId);
      if (positions == null) {
        continue;
      }
      int freq = postingsEnum.freq();
      for (int i = 0; i < freq; i++) {
        int pos = postingsEnum.nextPosition();
        if (positions.has(pos)) {
          weight++;
        }
      }
    }
  }
  return weight;
}

private void captureLead(PhrasePositions pp) throws IOException {
 if (captureLeadMatch == false) {
  return;
 }
 leadOrd = pp.ord;
 leadPosition = pp.position + pp.offset;
 leadOffset = pp.postings.startOffset();
 leadEndOffset = pp.postings.endOffset();
}

@Override
public int advance(int target) throws IOException {
 assert target > doc;
 doc = postings.advance(target);
 if (doc != DocIdSetIterator.NO_MORE_DOCS) {
  freq = postings.freq();
  assert freq >= 1;
  count = 0;
 }
 position = -1;
 return doc;
}

@Override
public int nextDoc() throws IOException {
 doc = postings.nextDoc();
 if (doc != DocIdSetIterator.NO_MORE_DOCS) {
  freq = postings.freq();
  assert freq >= 1;
  count = 0;
 }
 position = -1;
 return doc;
}

SMat S;
Terms terms = this.luceneUtils.getTermsForField(contentsField);
TermsEnum termsEnumForCount = terms.iterator();
int numTerms = 0,   nonZeroVals = 0;
BytesRef bytes;
while ((bytes = termsEnumForCount.next()) != null) {
 Term term = new Term(contentsField, bytes);
 if (this.luceneUtils.termFilter(term))
  numTerms++;
 while (docsEnum.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
  ++nonZeroVals;
TermsEnum termsEnum = terms.iterator();
int termCounter = 0;
int firstNonZero = 0; // Index of first non-zero entry (document) of each column (term).
while((bytes = termsEnum.next()) != null) {
 Term term = new Term(contentsField, bytes);
   termList[termCounter] = term.text();
  while (docsEnum.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
   S.rowind[firstNonZero] = docsEnum.docID();  // set row index to document number
   float value 	= luceneUtils.getGlobalTermWeight(term); //global weight
   value         =  value * (float) luceneUtils.getLocalTermWeight(docsEnum.freq()); // multiply by local weight

long totalTermFreq = 0;
while(true) {
 final int doc = postings.nextDoc();
 if (doc == DocIdSetIterator.NO_MORE_DOCS) {
  break;
 int freq = -1;
 if (hasFreqs) {
  freq = postings.freq();
  if (freq <= 0) {
   throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
  if (postings.freq() != 1) {
   throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false");
 if (hasPositions) {
  for(int j=0;j<freq;j++) {
   final int pos = postings.nextPosition();
   BytesRef payload = postings.getPayload();
    int startOffset = postings.startOffset();
    int endOffset = postings.endOffset();
  final int docID = postings.advance(skipDocID);
  if (docID == DocIdSetIterator.NO_MORE_DOCS) {
   break;
   final int freq = postings.freq();

"  Doc count:           " + terms.getDocCount() + "\n" +
"  Sum doc freq:        " + terms.getSumDocFreq() + "\n" +
"  Sum total term freq: " + terms.getSumDocFreq() + "\n" +
"  TERM '" + termsEnum.term().utf8ToString() + "':\n" +
"    Doc freq:        " + termsEnum.docFreq() + "\n" +
while (docPosEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      "      Doc id: " + docPosEnum.docID() + "\n" +
      "      Freq:   " + docPosEnum.freq() + "\n");
  for (int i = 0; i < docPosEnum.freq(); i++)  {
    int position = docPosEnum.nextPosition();
    int start = docPosEnum.startOffset();
    if (start >= 0) {
      int end = docPosEnum.startOffset();
      System.out.println("      " + position + " (offsets: " + start + "-" + end + ")");
    } else {
while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
  System.out.println(
      "      Doc id: " + postingsEnum.docID() + "\n" +
      "      Freq:   " + postingsEnum.freq() + "\n");

for (final LeafReaderContext ctx : r.leaves()) {
 final LeafReader ar = ctx.reader();
 final Terms terms = ar.terms(Consts.FULL);
 TermsEnum te = terms.iterator();
 while (te.next() != null) {
  FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
  final int ordinal = addCategory(cp);
  docs = te.postings(docs, PostingsEnum.NONE);
  ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
 base += ar.maxDoc(); // no deletions, so we're ok

final boolean hasPositions = terms.hasPositions();
final boolean hasOffsets = terms.hasOffsets();
final boolean hasPayloads = terms.hasPayloads();
assert !hasPayloads || hasPositions;
 termsEnum = terms.iterator();
 while(termsEnum.next() != null) {
  numTerms++;
while(termsEnum.next() != null) {
 termCount++;
 final int freq = (int) termsEnum.totalTermFreq();
 startTerm(termsEnum.term(), freq);
  assert docsAndPositionsEnum != null;
  final int docID = docsAndPositionsEnum.nextDoc();
  assert docID != DocIdSetIterator.NO_MORE_DOCS;
  assert docsAndPositionsEnum.freq() == freq;
   final int pos = docsAndPositionsEnum.nextPosition();
   final int startOffset = docsAndPositionsEnum.startOffset();
   final int endOffset = docsAndPositionsEnum.endOffset();
   final BytesRef payload = docsAndPositionsEnum.getPayload();

PostingsEnum postingsEnum = null;
for(Term deleteTerm : deleteTerms) {
 if (deleteTerm.field().equals(lastField) == false) {
  lastField = deleteTerm.field();
  Terms terms = fields.terms(lastField);
  if (terms != null) {
   termsEnum = terms.iterator();
  } else {
   termsEnum = null;
 if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) {
  postingsEnum = termsEnum.postings(postingsEnum, 0);
  int delDocLimit = segDeletes.get(deleteTerm);
  assert delDocLimit < PostingsEnum.NO_MORE_DOCS;
  while (true) {
   int doc = postingsEnum.nextDoc();
   if (doc < delDocLimit) {
    if (state.liveDocs == null) {

@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
 String desc = "weight(" + getQuery() + " in " + doc + ") [" + function + "]";
 Terms terms = context.reader().terms(fieldName);
 if (terms == null) {
  return Explanation.noMatch(desc + ". Field " + fieldName + " doesn't exist.");
 }
 TermsEnum termsEnum = terms.iterator();
 if (termsEnum.seekExact(new BytesRef(featureName)) == false) {
  return Explanation.noMatch(desc + ". Feature " + featureName + " doesn't exist.");
 }
 PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS);
 if (postings.advance(doc) != doc) {
  return Explanation.noMatch(desc + ". Feature " + featureName + " isn't set.");
 }
 return function.explain(fieldName, featureName, boost, doc, postings.freq());
}

BytesRef term = new BytesRef();
term.bytes = new byte[16];
BytesRef scratch = new BytesRef();
scratch.bytes = new byte[16];
  Terms terms = segState.reader.terms(termField);
  if (terms != null) {
   termsEnum = terms.iterator();
  } else {
   termsEnum = null;
 if (termsEnum.seekExact(term)) {
  postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
  DocValuesFieldUpdates dvUpdates = holder.get(updateField);
  if (dvUpdates == null) {
   while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (acceptDocs == null || acceptDocs.get(doc)) {
   while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (doc >= limit) {
     break; // no more docs that can be updated for this term

final int advanceDoc = postingsDocs.advance(j);
if (advanceDoc != j) {
 throw new RuntimeException("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")");
final int doc = postings.nextDoc();
 final int tf = postings.freq();
 if (postingsHasFreq && postingsDocs.freq() != tf) {
  throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs.freq());
   int pos = postings.nextPosition();
   if (postingsTerms.hasPositions()) {
    int postingsPos = postingsDocs.nextPosition();
   final int startOffset = postings.startOffset();
   final int endOffset = postings.endOffset();
    int postingsStartOffset = postingsDocs.startOffset();
    int postingsEndOffset = postingsDocs.endOffset();
    if (startOffset != postingsStartOffset) {
     throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset);
   BytesRef payload = postings.getPayload();
     if (postingsDocs.getPayload() != null) {
      throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsDocs.getPayload());
     if (postingsDocs.getPayload() == null) {
      throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");

private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard, LeafReader leafReader,
                 IntConsumer consumer) throws IOException {
  Terms terms = leafReader.terms(idField);
  TermsEnum iterator = terms.iterator();
  BytesRef idTerm;
  PostingsEnum postingsEnum = null;
  while ((idTerm = iterator.next()) != null) {
    if (includeInShard.test(idTerm) == false) {
      postingsEnum = iterator.postings(postingsEnum);
      int doc;
      while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        consumer.accept(doc);
      }
    }
  }
}

@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
 Terms terms = context.reader().terms(field);
 if (terms != null) {
  TermsEnum segmentTermsEnum = terms.iterator();
  BytesRef spare = new BytesRef();
  PostingsEnum postingsEnum = null;
  for (int i = 0; i < TermsIncludingScoreQuery.this.terms.size(); i++) {
   if (segmentTermsEnum.seekExact(TermsIncludingScoreQuery.this.terms.get(ords[i], spare))) {
    postingsEnum = segmentTermsEnum.postings(postingsEnum, PostingsEnum.NONE);
    if (postingsEnum.advance(doc) == doc) {
     final float score = TermsIncludingScoreQuery.this.scores[ords[i]];
     return Explanation.match(score, "Score based on join value " + segmentTermsEnum.term().utf8ToString());
    }
   }
  }
 }
 return Explanation.noMatch("Not a match");
}

@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
  Terms terms = context.reader().terms(UidFieldMapper.NAME);
  if (terms == null) {
    return null;
  TermsEnum termsEnum = terms.iterator();
  BytesRefBuilder uidSpare = new BytesRefBuilder();
  BytesRef idSpare = new BytesRef();
    acceptDocs = context.reader().getLiveDocs();
    parentIds.get(i, idSpare);
    BytesRef uid = Uid.createUidAsBytes(parentTypeBr, idSpare, uidSpare);
    if (termsEnum.seekExact(uid)) {
      docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
      int docId;
      for (docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
        if (acceptDocs == null || acceptDocs.get(docId)) {
          break;
        if (expectedCardinality >= (context.reader().maxDoc() >>> 10)) {
          result = new FixedBitSet(context.reader().maxDoc());
        } else {
      assert docsEnum.advance(docId + 1) == DocIdSetIterator.NO_MORE_DOCS : "DocId " + docId + " should have been the last one but docId " + docsEnum.docID() + " exists.";

Map<String, List<Integer>> termToPositions = new HashMap<>();
Terms t = leaf.terms(field);
  TermsEnum tenum = t.iterator();
  while ((termBytes = tenum.next()) != null) {
    termToPositions.put(termBytes.utf8ToString(), positions);
    postings = tenum.postings(postings);
    postings.advance(0);
    for (int i = 0; i < postings.freq(); i++) {
      positions.add(postings.nextPosition());

public void termPostingsList(String field, String termText)  throws IOException {
  /*
    Note this method only iterates through the termpostings of the first segement
    in the index i.e. reader.leaves().get(0).reader();
    To go through all term postings list for a term, you need to iterate over
    both the segements, and the leafreaders.
  */
  LeafReader leafReader = reader.leaves().get(0).reader();
  Terms terms = leafReader.terms(field);
  TermsEnum te = terms.iterator();
  te.seekCeil(new BytesRef(termText));
  PostingsEnum postings = te.postings(null);
  int doc;
  while ((doc = postings.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
    System.out.println(doc);
    // you can also iterate positions for each doc
    int position;
    int numPositions = postings.freq();
    for (int i = 0; i < numPositions; i++) {
      int pos = postings.nextPosition();
      if (pos > 0){
        //Only prints out the positions if they are indexed
        System.out.println(pos);
      }
    }
  }
}

 Terms vector = indexReader.getTermVector(id, field);
if (vector != null) {
  TermsEnum vectorEnum = vector.iterator();
  BytesRef text;
  while ((text = vectorEnum.next()) != null) {
    String term = text.utf8ToString();
    PostingsEnum postings = vectorEnum.postings(null, PostingsEnum.POSITIONS);
    while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      int freq = postings.freq();
      while (freq-- > 0)
        logger.info("Position: {}", postings.nextPosition());
    }
  }
}

final PostingsEnum[] docsEnums = new PostingsEnum[subReaders.size()];
for (int subIDX = 0; subIDX < subReaders.size(); subIDX++) {
  termsEnums[subIDX] = subReaders.get(subIDX).reader().fields().terms("id").iterator();
  final BytesRef id = new BytesRef(ids[idx]);
    final TermsEnum termsEnum = termsEnums[subIDX];
    if (termsEnum.seekExact(id)) {
      final PostingsEnum docs = docsEnums[subIDX] = termsEnum.postings(docsEnums[subIDX],
                                       0);
        final int docID = docs.nextDoc();
        Bits liveDocs = subReader.getLiveDocs();
    base += subReader.maxDoc();

Javadoc

Iterates through the postings. NOTE: you must first call #nextDoc before using any of the per-doc methods.

Most used methods

nextDoc
freq
Returns term frequency in the current document, or 1 if the field was indexed with IndexOptions#DOCS
nextPosition
Returns the next position, or -1 if positions were not indexed. Calling this more than #freq() times
docID
endOffset
Returns end offset for the current position, or -1 if offsets were not indexed.
startOffset
Returns start offset for the current position, or -1 if offsets were not indexed.
advance
getPayload
Returns the payload at this position, or null if no payload was indexed. You should not modify anyth
featureRequested
Returns true if the given feature is requested in the flags, false otherwise.
cost
attributes
Returns the related attributes.

attributes

Popular in Java

Parsing JSON documents to java classes using gson
runOnUiThread (Activity)
addToBackStack (FragmentTransaction)
getSupportFragmentManager (FragmentActivity)
Path (java.nio.file)
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
JLabel (javax.swing)
Join (org.hibernate.mapping)
CodeWhisperer alternatives

How to usePostingsEnum in org.apache.lucene.index

Best Java code snippets using org.apache.lucene.index.PostingsEnum (Showing top 20 results out of 333)

Refine search

How to use
PostingsEnum
in
org.apache.lucene.index