congrats Icon
New! Announcing Tabnine Chat Beta
Learn More
Tabnine Logo
Terms
Code IndexAdd Tabnine to your IDE (free)

How to use
Terms
in
org.apache.lucene.index

Best Java code snippets using org.apache.lucene.index.Terms (Showing top 20 results out of 774)

Refine searchRefine arrow

  • TermsEnum
  • BytesRef
  • Term
  • LeafReader
  • PostingsEnum
  • LeafReaderContext
  • IndexReader
origin: org.apache.lucene/lucene-core

/** Returns {@link PostingsEnum} for the specified term.
 *  This will return null if either the field or
 *  term does not exist.
 *  <p><b>NOTE:</b> The returned {@link PostingsEnum} may contain deleted docs.
 *  @see TermsEnum#postings(PostingsEnum) */
public final PostingsEnum postings(Term term, int flags) throws IOException {
 assert term.field() != null;
 assert term.bytes() != null;
 final Terms terms = terms(term.field());
 if (terms != null) {
  final TermsEnum termsEnum = terms.iterator();
  if (termsEnum.seekExact(term.bytes())) {
   return termsEnum.postings(null, flags);
  }
 }
 return null;
}
origin: org.apache.lucene/lucene-core

 /** 
  * Expert: returns additional information about this Terms instance
  * for debugging purposes.
  */
 public Object getStats() throws IOException {
  StringBuilder sb = new StringBuilder();
  sb.append("impl=" + getClass().getSimpleName());
  sb.append(",size=" + size());
  sb.append(",docCount=" + getDocCount());
  sb.append(",sumTotalTermFreq=" + getSumTotalTermFreq());
  sb.append(",sumDocFreq=" + getSumDocFreq());
  return sb.toString();
 }
}
origin: org.apache.lucene/lucene-core

@Override
public Matches matches(LeafReaderContext context, int doc) throws IOException {
 TermsEnum te = getTermsEnum(context);
 if (te == null) {
  return null;
 }
 if (context.reader().terms(term.field()).hasPositions() == false) {
  return super.matches(context, doc);
 }
 return MatchesUtils.forField(term.field(), () -> {
  PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
  if (pe.advance(doc) != doc) {
   return null;
  }
  return new TermMatchesIterator(getQuery(), pe);
 });
}
origin: org.apache.lucene/lucene-core

/** Create a {@link DocIdSetBuilder} instance that is optimized for
 *  accumulating docs that match the given {@link Terms}. */
public DocIdSetBuilder(int maxDoc, Terms terms) throws IOException {
 this(maxDoc, terms.getDocCount(), terms.getSumDocFreq());
}
origin: org.apache.lucene/lucene-core

/** Sole constructor.
 *
 * @param subs The {@link Terms} instances of all sub-readers. 
 * @param subSlices A parallel array (matching {@code
 *        subs}) describing the sub-reader slices.
 */
public MultiTerms(Terms[] subs, ReaderSlice[] subSlices) throws IOException {
 this.subs = subs;
 this.subSlices = subSlices;
 
 assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
 boolean _hasFreqs = true;
 boolean _hasOffsets = true;
 boolean _hasPositions = true;
 boolean _hasPayloads = false;
 for(int i=0;i<subs.length;i++) {
  _hasFreqs &= subs[i].hasFreqs();
  _hasOffsets &= subs[i].hasOffsets();
  _hasPositions &= subs[i].hasPositions();
  _hasPayloads |= subs[i].hasPayloads();
 }
 hasFreqs = _hasFreqs;
 hasOffsets = _hasOffsets;
 hasPositions = _hasPositions;
 hasPayloads = hasPositions && _hasPayloads; // if all subs have pos, and at least one has payloads.
}
origin: org.elasticsearch/elasticsearch

private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
  // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
  // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
  List<LeafReaderContext> leaves = reader.leaves();
  for (LeafReaderContext leaf : leaves) {
    Terms _terms = leaf.reader().terms(field);
    if (_terms == null) {
      continue;
    }
    TermsEnum termsEnum = _terms.iterator();
    TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
    if (TermsEnum.SeekStatus.END == seekStatus) {
      continue;
    }
    for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
      if (!StringHelper.startsWith(term, prefix.bytes())) {
        break;
      }
      terms.add(new Term(field, BytesRef.deepCopyOf(term)));
      if (terms.size() >= maxExpansions) {
        return;
      }
    }
  }
}
origin: lucene4ir/lucene4ir

public void termsList(String field) throws IOException {
  // again, we'll just look at the first segment.  Terms dictionaries
  // for different segments may well be different, as they depend on
  // the individual documents that have been added.
  LeafReader leafReader = reader.leaves().get(0).reader();
  Terms terms = leafReader.terms(field);
  // The Terms object gives us some stats for this term within the segment
  System.out.println("Number of docs with this term:" + terms.getDocCount());
  TermsEnum te = terms.iterator();
  BytesRef term;
  while ((term = te.next()) != null) {
    System.out.println(term.utf8ToString() + " DF: " + te.docFreq() + " CF: " + te.totalTermFreq());
  }
}
origin: org.apache.lucene/lucene-core

final Terms terms = context.reader().terms(query.field);
if (terms == null) {
  final TermContext termContext = new TermContext(searcher.getTopReaderContext());
  termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
  bq.add(new TermQuery(new Term(query.field, t.term), termContext), Occur.SHOULD);
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
if (collectedTerms.isEmpty() == false) {
 TermsEnum termsEnum2 = terms.iterator();
 for (TermAndState t : collectedTerms) {
  termsEnum2.seekExact(t.term, t.state);
  docs = termsEnum2.postings(docs, PostingsEnum.NONE);
  builder.add(docs);
 docs = termsEnum.postings(docs, PostingsEnum.NONE);
 builder.add(docs);
} while (termsEnum.next() != null);
origin: oracle/opengrok

public void listTokens(int freq) throws IOException {
  IndexReader ireader = null;
  TermsEnum iter = null;
  Terms terms;
  try {
    ireader = DirectoryReader.open(indexDirectory);
    int numDocs = ireader.numDocs();
    if (numDocs > 0) {
      Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
      terms = uFields.terms(QueryBuilder.DEFS);
      iter = terms.iterator(); // init uid iterator
    }
    while (iter != null && iter.term() != null) {
      //if (iter.term().field().startsWith("f")) {
      if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
        LOGGER.warning(iter.term().utf8ToString());
      }
      BytesRef next = iter.next();
      if (next==null) {iter=null;}
    }
  } finally {
    if (ireader != null) {
      try {
        ireader.close();
      } catch (IOException e) {
        LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
      }
    }
  }
}
origin: org.apache.lucene/lucene-core

final boolean hasFreqs = terms.hasFreqs();
final boolean hasPositions = terms.hasPositions();
final boolean hasPayloads = terms.hasPayloads();
final boolean hasOffsets = terms.hasOffsets();
 minTerm = null;
} else {
 BytesRef bb = terms.getMin();
 if (bb != null) {
  assert bb.isValid();
  minTerm = BytesRef.deepCopyOf(bb);
 } else {
  minTerm = null;
 bb = terms.getMax();
 if (bb != null) {
  assert bb.isValid();
 if (terms.getSumTotalTermFreq() != -1) {
  throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)");
final TermsEnum termsEnum = terms.iterator();
 final Object stats = fieldTerms.getStats();
 assert stats != null;
 if (status.blockTreeStats == null) {
  final long v = fields.terms(field).getSumTotalTermFreq();
  if (v != -1 && sumTotalTermFreq != v) {
origin: org.elasticsearch/elasticsearch

ScoreTermsQueue queue = new ScoreTermsQueue(Math.min(maxNumTerms, (int) terms.size()));
TermsEnum termsEnum = terms.iterator();
TermsEnum topLevelTermsEnum = topLevelTerms.iterator();
while (termsEnum.next() != null) {
  BytesRef termBytesRef = termsEnum.term();
  boolean foundTerm = topLevelTermsEnum.seekExact(termBytesRef);
  assert foundTerm : "Term: " + termBytesRef.utf8ToString() + " not found!";
  Term term = new Term(fieldName, termBytesRef);
  if (isNoise(term.bytes().utf8ToString(), freq)) {
    continue;
  queue.addOrUpdate(new ScoreTerm(term.field(), term.bytes().utf8ToString(), score));
origin: oracle/opengrok

int numDocs = reader.numDocs();
if (numDocs > 0) {
    uidIter = terms.iterator();
    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid
    if (stat == TermsEnum.SeekStatus.END) {
      uidIter = null;
  while (uidIter != null && uidIter.term() != null
    && uidIter.term().utf8ToString().startsWith(startuid)) {
  reader.close();
origin: org.apache.lucene/lucene-classification

private Query newTermQuery(IndexReader reader, Term term) throws IOException {
 // we build an artificial TermContext that will give an overall df and ttf
 // equal to 1
 TermContext context = new TermContext(reader.getContext());
 for (LeafReaderContext leafContext : reader.leaves()) {
  Terms terms = leafContext.reader().terms(term.field());
  if (terms != null) {
   TermsEnum termsEnum = terms.iterator();
   if (termsEnum.seekExact(term.bytes())) {
    int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
    context.register(termsEnum.termState(), leafContext.ord, freq, freq);
   }
  }
 }
 return new TermQuery(term, context);
}
origin: org.apache.lucene/lucene-core

final boolean hasPositions = terms.hasPositions();
final boolean hasOffsets = terms.hasOffsets();
final boolean hasPayloads = terms.hasPayloads();
assert !hasPayloads || hasPositions;
int numTerms = (int) terms.size();
if (numTerms == -1) {
 termsEnum = terms.iterator();
 while(termsEnum.next() != null) {
  numTerms++;
termsEnum = terms.iterator();
while(termsEnum.next() != null) {
 termCount++;
 final int freq = (int) termsEnum.totalTermFreq();
 startTerm(termsEnum.term(), freq);
  assert docsAndPositionsEnum != null;
  final int docID = docsAndPositionsEnum.nextDoc();
  assert docID != DocIdSetIterator.NO_MORE_DOCS;
  assert docsAndPositionsEnum.freq() == freq;
origin: org.elasticsearch/elasticsearch

TermsEnum topLevelIterator = topLevelTerms.iterator();
boolean positions = flags.contains(Flag.Positions) && fieldTermVector.hasPositions();
boolean offsets = flags.contains(Flag.Offsets) && fieldTermVector.hasOffsets();
boolean payloads = flags.contains(Flag.Payloads) && fieldTermVector.hasPayloads();
long termsSize = fieldTermVector.size();
if (hasScores) {
  termsSize = Math.min(termsSize, termVectorsFilter.size(field));
TermsEnum iterator = fieldTermVector.iterator();
final boolean useDocsAndPos = positions || offsets || payloads;
while (iterator.next() != null) { // iterate all terms of the current field
  BytesRef termBytesRef = iterator.term();
  Term term = new Term(field, termBytesRef);
      writeTermStatistics(statistics == null ? new TermStatistics(termBytesRef, 0, 0) : statistics);
    } else {
      boolean foundTerm = topLevelIterator.seekExact(termBytesRef);
      if (foundTerm) {
        writeTermStatistics(topLevelIterator);
origin: org.apache.lucene/lucene-core

@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
 String desc = "weight(" + getQuery() + " in " + doc + ") [" + function + "]";
 Terms terms = context.reader().terms(fieldName);
 if (terms == null) {
  return Explanation.noMatch(desc + ". Field " + fieldName + " doesn't exist.");
 }
 TermsEnum termsEnum = terms.iterator();
 if (termsEnum.seekExact(new BytesRef(featureName)) == false) {
  return Explanation.noMatch(desc + ". Feature " + featureName + " doesn't exist.");
 }
 PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS);
 if (postings.advance(doc) != doc) {
  return Explanation.noMatch(desc + ". Feature " + featureName + " isn't set.");
 }
 return function.explain(fieldName, featureName, boost, doc, postings.freq());
}
origin: harbby/presto-connectors

@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
 Terms terms = context.reader().terms(field);
 if (terms != null) {
  TermsEnum segmentTermsEnum = terms.iterator();
  BytesRef spare = new BytesRef();
  PostingsEnum postingsEnum = null;
  for (int i = 0; i < TermsIncludingScoreQuery.this.terms.size(); i++) {
   if (segmentTermsEnum.seekExact(TermsIncludingScoreQuery.this.terms.get(ords[i], spare))) {
    postingsEnum = segmentTermsEnum.postings(postingsEnum, PostingsEnum.NONE);
    if (postingsEnum.advance(doc) == doc) {
     final float score = TermsIncludingScoreQuery.this.scores[ords[i]];
     return Explanation.match(score, "Score based on join value " + segmentTermsEnum.term().utf8ToString());
    }
   }
  }
 }
 return Explanation.noMatch("Not a match");
}
origin: org.apache.lucene/lucene-core

/**
 * Creates a {@link TermContext} from a top-level {@link IndexReaderContext} and the
 * given {@link Term}. This method will lookup the given term in all context's leaf readers 
 * and register each of the readers containing the term in the returned {@link TermContext}
 * using the leaf reader's ordinal.
 * <p>
 * Note: the given context must be a top-level context.
 */
public static TermContext build(IndexReaderContext context, Term term)
  throws IOException {
 assert context != null && context.isTopLevel;
 final String field = term.field();
 final BytesRef bytes = term.bytes();
 final TermContext perReaderTermState = new TermContext(context);
 //if (DEBUG) System.out.println("prts.build term=" + term);
 for (final LeafReaderContext ctx : context.leaves()) {
  //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
  final Terms terms = ctx.reader().terms(field);
  if (terms != null) {
   final TermsEnum termsEnum = terms.iterator();
   if (termsEnum.seekExact(bytes)) { 
    final TermState termState = termsEnum.termState();
    //if (DEBUG) System.out.println("    found");
    perReaderTermState.register(termState, ctx.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
   }
  }
 }
 return perReaderTermState;
}
origin: org.apache.lucene/lucene-core

TermsEnum termsEnum = terms.iterator();
final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
final boolean postingsHasPayload = fieldInfo.hasPayloads();
final boolean vectorsHasPayload = terms.hasPayloads();
 throw new RuntimeException("vector field=" + field + " does not exist in postings; doc=" + j);
TermsEnum postingsTermsEnum = postingsTerms.iterator();
final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
BytesRef term = null;
while ((term = termsEnum.next()) != null) {
 postings = termsEnum.postings(postings, PostingsEnum.ALL);
 assert postings != null;
 final int doc = postings.nextDoc();
  final int tf = postings.freq();
    if (postingsTerms.hasPositions()) {
     int postingsPos = postingsDocs.nextPosition();
     if (terms.hasPositions() && pos != postingsPos) {
      throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
    if (startOffset != -1 && endOffset != -1 && postingsTerms.hasOffsets()) {
origin: org.elasticsearch/elasticsearch

@Override
DocIdSet processLeaf(Query query, CompositeValuesCollectorQueue queue,
           LeafReaderContext context, boolean fillDocIdSet) throws IOException {
  final Terms terms = context.reader().terms(field);
  if (terms == null) {
  final TermsEnum te = terms.iterator();
  if (lowerValue != null) {
    if (te.seekCeil(lowerValue) == TermsEnum.SeekStatus.END) {
      return DocIdSet.EMPTY ;
    if (te.next() == null) {
      return DocIdSet.EMPTY;
  DocIdSetBuilder builder = fillDocIdSet ? new DocIdSetBuilder(context.reader().maxDoc(), terms) : null;
  PostingsEnum reuse = null;
  boolean first = true;
  final BytesRef upper = upperValue == null ? null : BytesRef.deepCopyOf(upperValue);
  do {
    if (upper != null && upper.compareTo(te.term()) < 0) {
      break;
org.apache.lucene.indexTerms

Javadoc

Access to the terms in a specific field. See Fields.

Most used methods

  • iterator
  • size
    Returns the number of terms for this field, or -1 if this measure isn't stored by the codec. Note th
  • getSumTotalTermFreq
    Returns the sum of TermsEnum#totalTermFreq for all terms in this field, or -1 if this measure isn't
  • hasPositions
    Returns true if documents in this field store positions.
  • getDocCount
    Returns the number of documents that have at least one term for this field, or -1 if this measure is
  • getSumDocFreq
    Returns the sum of TermsEnum#docFreq() for all terms in this field, or -1 if this measure isn't stor
  • hasPayloads
    Returns true if documents in this field store payloads.
  • hasOffsets
    Returns true if documents in this field store offsets.
  • getMax
    Returns the largest term (in lexicographic order) in the field. Note that, just like other term meas
  • getMin
    Returns the smallest term (in lexicographic order) in the field. Note that, just like other term mea
  • intersect
    Returns a TermsEnum that iterates over all terms and documents that are accepted by the provided Com
  • hasFreqs
    Returns true if documents in this field store per-document term frequency ( PostingsEnum#freq).
  • intersect,
  • hasFreqs,
  • getStats

Popular in Java

  • Reactive rest calls using spring rest template
  • getApplicationContext (Context)
  • requestLocationUpdates (LocationManager)
  • putExtra (Intent)
  • Container (java.awt)
    A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
  • InputStreamReader (java.io)
    A class for turning a byte stream into a character stream. Data read from the source input stream is
  • String (java.lang)
  • HttpServletRequest (javax.servlet.http)
    Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
  • Table (org.hibernate.mapping)
    A relational table
  • Scheduler (org.quartz)
    This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
  • Top PhpStorm plugins
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now