congrats Icon
New! Announcing Tabnine Chat Beta
Learn More
Tabnine Logo
LeafReader
Code IndexAdd Tabnine to your IDE (free)

How to use
LeafReader
in
org.apache.lucene.index

Best Java code snippets using org.apache.lucene.index.LeafReader (Showing top 20 results out of 567)

Refine searchRefine arrow

  • LeafReaderContext
  • IndexReader
  • DirectoryReader
  • Document
  • Directory
  • Terms
  • TermsEnum
  • BytesRef
  • IndexWriterConfig
origin: org.apache.lucene/lucene-core

/** This method may return null if the field does not exist or if it has no terms. */
public static Terms getTerms(IndexReader r, String field) throws IOException {
 final List<LeafReaderContext> leaves = r.leaves();
 if (leaves.size() == 1) {
  return leaves.get(0).reader().terms(field);
 }
 final List<Terms> termsPerLeaf = new ArrayList<>(leaves.size());
 final List<ReaderSlice> slicePerLeaf = new ArrayList<>(leaves.size());
 for (int leafIdx = 0; leafIdx < leaves.size(); leafIdx++) {
  LeafReaderContext ctx = leaves.get(leafIdx);
  Terms subTerms = ctx.reader().terms(field);
  if (subTerms != null) {
   termsPerLeaf.add(subTerms);
   slicePerLeaf.add(new ReaderSlice(ctx.docBase, r.maxDoc(), leafIdx - 1));
  }
 }
 if (termsPerLeaf.size() == 0) {
  return null;
 } else {
  return new MultiTerms(termsPerLeaf.toArray(Terms.EMPTY_ARRAY),
    slicePerLeaf.toArray(ReaderSlice.EMPTY_ARRAY));
 }
}

origin: org.apache.lucene/lucene-core

 @Override
 public boolean test(LeafReaderContext context) {
  final int maxDoc = context.reader().maxDoc();
  if (maxDoc < minSize) {
   return false;
  }
  final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
  final float sizeRatio = (float) context.reader().maxDoc() / topLevelContext.reader().maxDoc();
  return sizeRatio >= minSizeRatio;
 }
}
origin: neo4j/neo4j

  /**
   * @return the {@code NumericDocValues} for a given field
   * @throws IllegalArgumentException if this field is not indexed with numeric doc values
   */
  public NumericDocValues readDocValues( String field )
  {
    try
    {
      NumericDocValues dv = context.reader().getNumericDocValues( field );
      if ( dv == null )
      {
        FieldInfo fi = context.reader().getFieldInfos().fieldInfo( field );
        DocValuesType actual = null;
        if ( fi != null )
        {
          actual = fi.getDocValuesType();
        }
        throw new IllegalStateException(
            "The field '" + field + "' is not indexed properly, expected NumericDV, but got '" +
            actual + "'" );
      }
      return dv;
    }
    catch ( IOException e )
    {
      throw new RuntimeException( e );
    }
  }
}
origin: org.apache.lucene/lucene-core

FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
final DocIdSetIterator iterator;
if (fieldInfo != null) {
   break;
  case NUMERIC:
   iterator = reader.getNumericDocValues(field);
   break;
  case BINARY:
   iterator = reader.getBinaryDocValues(field);
   break;
  case SORTED:
   iterator = reader.getSortedDocValues(field);
   break;
  case SORTED_NUMERIC:
   iterator = reader.getSortedNumericDocValues(field);
   break;
  case SORTED_SET:
   iterator = reader.getSortedSetDocValues(field);
   break;
  default:
origin: org.apache.lucene/lucene-core

private DocMap[] buildDeletionDocMaps(List<CodecReader> readers) {
 int totalDocs = 0;
 int numReaders = readers.size();
 DocMap[] docMaps = new DocMap[numReaders];
 for (int i = 0; i < numReaders; i++) {
  LeafReader reader = readers.get(i);
  Bits liveDocs = reader.getLiveDocs();
  final PackedLongValues delDocMap;
  if (liveDocs != null) {
   delDocMap = removeDeletes(reader.maxDoc(), liveDocs);
  } else {
   delDocMap = null;
  }
  final int docBase = totalDocs;
  docMaps[i] = new DocMap() {
   @Override
   public int get(int docID) {
    if (liveDocs == null) {
     return docBase + docID;
    } else if (liveDocs.get(docID)) {
     return docBase + (int) delDocMap.get(docID);
    } else {
     return -1;
    }
   }
  };
  totalDocs += reader.numDocs();
 }
 return docMaps;
}
origin: apache/tika

IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
int maxLen = 1000000;
int len = 0;
    while (line != null) {
      len += line.length();
      Document document = new Document();
      document.add(new TextField(FIELD, line, Field.Store.NO));
      docs.add(document);
      if (len > maxLen) {
  writer.flush();
try (IndexReader reader = DirectoryReader.open(directory)) {
  LeafReader wrappedReader = SlowCompositeReaderWrapper.wrap(reader);
  Terms terms = wrappedReader.terms(FIELD);
  TermsEnum termsEnum = terms.iterator();
  BytesRef bytesRef = termsEnum.next();
  int docsWThisField = wrappedReader.getDocCount(FIELD);
  while (bytesRef != null) {
    int df = termsEnum.docFreq();
    long tf = termsEnum.totalTermFreq();
    if (MIN_DOC_FREQ > -1 && df < MIN_DOC_FREQ) {
      bytesRef = termsEnum.next();
      String t = bytesRef.utf8ToString();
      if (! WHITE_LIST.contains(t) && ! BLACK_LIST.contains(t)) {
        queue.insertWithOverflow(new TokenDFTF(t, df, tf));
origin: org.apache.lucene/lucene-core

@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
 Terms terms = context.reader().terms(fieldName);
 if (terms == null) {
  return null;
 }
 TermsEnum termsEnum = terms.iterator();
 if (termsEnum.seekExact(new BytesRef(featureName)) == false) {
  return null;
 }
 SimScorer scorer = function.scorer(fieldName, boost);
 PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS);
 return new Scorer(this) {
  @Override
  public int docID() {
   return postings.docID();
  }
  @Override
  public float score() throws IOException {
   return scorer.score(postings.docID(), postings.freq());
  }
  @Override
  public DocIdSetIterator iterator() {
   return postings;
  }
 };
}
origin: org.infinispan/infinispan-embedded-query

DirectoryReader r = DirectoryReader.open(taxoDir);
try {
 final int size = r.numDocs();
 final OrdinalMap ordinalMap = map;
 ordinalMap.setSize(size);
 int base = 0;
 PostingsEnum docs = null;
 for (final LeafReaderContext ctx : r.leaves()) {
  final LeafReader ar = ctx.reader();
  final Terms terms = ar.terms(Consts.FULL);
  TermsEnum te = terms.iterator();
  while (te.next() != null) {
   FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
   final int ordinal = addCategory(cp);
   docs = te.postings(docs, PostingsEnum.NONE);
   ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
  base += ar.maxDoc(); // no deletions, so we're ok
origin: org.infinispan/infinispan-embedded-query

try {
 PostingsEnum postingsEnum = null;
 for (LeafReaderContext ctx : reader.leaves()) {
  Terms terms = ctx.reader().terms(Consts.FULL);
  if (terms != null) { // cannot really happen, but be on the safe side
   TermsEnum termsEnum = terms.iterator();
   while (termsEnum.next() != null) {
    if (!cache.isFull()) {
     BytesRef t = termsEnum.term();
     FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(t.utf8ToString()));
     postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
     boolean res = cache.put(cp, postingsEnum.nextDoc() + ctx.docBase);
     assert !res : "entries should not have been evicted from the cache";
origin: org.infinispan/infinispan-lucene-directory

/**
* Read all terms from a field
*
* @param field the field in the document to load terms from
* @param directory Any directory implementation
* @return Unique terms represented as UTF-8
* @throws IOException
*/
public static Set<String> readTerms(String field, Directory directory) throws IOException {
 try (DirectoryReader reader = DirectoryReader.open(directory)) {
   Set<String> termStrings = new TreeSet<>();
   for (LeafReaderContext atomicReaderContext : reader.leaves()) {
    LeafReader atomicReader = atomicReaderContext.reader();
    TermsEnum iterator = atomicReader.terms(field).iterator();
    BytesRef next = iterator.next();
    while (next != null) {
      termStrings.add(iterator.term().utf8ToString());
      next = iterator.next();
    }
   }
   return termStrings;
 }
}
origin: neo4j/neo4j

for ( LeafReaderContext readerContext : indexReader.leaves() )
    for ( String fieldName : fieldNames )
      Terms terms = readerContext.reader().terms( fieldName );
      if ( terms != null )
        while ( (termsRef = termsEnum.next()) != null )
          sampler.include( termsRef.utf8ToString(), termsEnum.docFreq() );
          checkCancellation();
return sampler.result( indexReader.numDocs() );
origin: flaxsearch/luwak

private LeafReader build(IndexWriter writer) throws IOException {
  for (InputDocument doc : documents) {
    writer.addDocument(doc.getDocument());
  }
  writer.commit();
  writer.forceMerge(1);
  LeafReader reader = DirectoryReader.open(directory).leaves().get(0).reader();
  assert reader != null;
  docIds = new String[reader.maxDoc()];
  for (int i = 0; i < docIds.length; i++) {
    docIds[i] = reader.document(i).get(InputDocument.ID_FIELD);     // TODO can this be more efficient?
  }
  return reader;
}
origin: org.elasticsearch/elasticsearch

@Override
DocIdSet processLeaf(Query query, CompositeValuesCollectorQueue queue,
           LeafReaderContext context, boolean fillDocIdSet) throws IOException {
  final Terms terms = context.reader().terms(field);
  if (terms == null) {
  final TermsEnum te = terms.iterator();
  if (lowerValue != null) {
    if (te.seekCeil(lowerValue) == TermsEnum.SeekStatus.END) {
      return DocIdSet.EMPTY ;
    if (te.next() == null) {
      return DocIdSet.EMPTY;
  DocIdSetBuilder builder = fillDocIdSet ? new DocIdSetBuilder(context.reader().maxDoc(), terms) : null;
  PostingsEnum reuse = null;
  boolean first = true;
  final BytesRef upper = upperValue == null ? null : BytesRef.deepCopyOf(upperValue);
  do {
    if (upper != null && upper.compareTo(te.term()) < 0) {
      break;
origin: org.elasticsearch/elasticsearch

private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
  // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
  // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
  List<LeafReaderContext> leaves = reader.leaves();
  for (LeafReaderContext leaf : leaves) {
    Terms _terms = leaf.reader().terms(field);
    if (_terms == null) {
      continue;
    }
    TermsEnum termsEnum = _terms.iterator();
    TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
    if (TermsEnum.SeekStatus.END == seekStatus) {
      continue;
    }
    for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
      if (!StringHelper.startsWith(term, prefix.bytes())) {
        break;
      }
      terms.add(new Term(field, BytesRef.deepCopyOf(term)));
      if (terms.size() >= maxExpansions) {
        return;
      }
    }
  }
}
origin: oracle/opengrok

Terms terms = leafReaderContext.reader().terms(suggesterQuery.getField());
BytesRef term = termsEnum.next();
while (term != null) {
  if (Thread.currentThread().isInterrupted()) {
    postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.POSITIONS | PostingsEnum.FREQS);
  } else {
    postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
        queue.insertWithOverflow(new LookupResultItem(term.utf8ToString(), project, score));
origin: org.apache.lucene/lucene-core

if (reader.hasDeletions()) {
 final List<LeafReaderContext> leaves = reader.leaves();
 final int size = leaves.size();
 assert size > 0 : "A reader with deletions must have at least one leave";
 if (size == 1) {
  return leaves.get(0).reader().getLiveDocs();
  liveDocs[i] = ctx.reader().getLiveDocs();
  starts[i] = ctx.docBase;
 starts[size] = reader.maxDoc();
 return new MultiBits(liveDocs, starts, true);
} else {
origin: org.apache.lucene/lucene-core

final List<LeafReaderContext> leaves = r.leaves();
final int size = leaves.size();
 return null;
} else if (size == 1) {
 return leaves.get(0).reader().getSortedSetDocValues(field);
for (int i = 0; i < size; i++) {
 LeafReaderContext context = leaves.get(i);
 SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
 if (v == null) {
  v = DocValues.emptySortedSet();
 starts[i] = context.docBase;
starts[size] = r.maxDoc();
 IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
 IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
 OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
origin: org.apache.lucene/lucene-core

@Override
public final int docFreq(Term term) throws IOException {
 final Terms terms = terms(term.field());
 if (terms == null) {
  return 0;
 }
 final TermsEnum termsEnum = terms.iterator();
 if (termsEnum.seekExact(term.bytes())) {
  return termsEnum.docFreq();
 } else {
  return 0;
 }
}
origin: org.apache.lucene/lucene-core

/** Returns the number of documents containing the term
 * <code>t</code>.  This method returns 0 if the term or
 * field does not exists.  This method does not take into
 * account deleted documents that have not yet been merged
 * away. */
@Override
public final long totalTermFreq(Term term) throws IOException {
 final Terms terms = terms(term.field());
 if (terms == null) {
  return 0;
 }
 final TermsEnum termsEnum = terms.iterator();
 if (termsEnum.seekExact(term.bytes())) {
  return termsEnum.totalTermFreq();
 } else {
  return 0;
 }
}
origin: org.apache.lucene/lucene-core

final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
 IndexReaderContext topReaderContext = reader.getContext();
 for (LeafReaderContext context : topReaderContext.leaves()) {
  final Terms terms = context.reader().terms(query.field);
  if (terms == null) {
   // field does not exist
   continue;
  }
  final TermsEnum termsEnum = getTermsEnum(query, terms, collector.attributes);
  assert termsEnum != null;
  if (termsEnum == TermsEnum.EMPTY)
   continue;
  
  collector.setReaderContext(topReaderContext, context);
  collector.setNextEnum(termsEnum);
  BytesRef bytes;
  while ((bytes = termsEnum.next()) != null) {
   if (!collector.collect(bytes))
    return; // interrupt whole term collection, so also don't iterate other subReaders
  }
 }
}

org.apache.lucene.indexLeafReader

Javadoc

LeafReader is an abstract class, providing an interface for accessing an index. Search of an index is done entirely through this abstract interface, so that any subclass which implements it is searchable. IndexReaders implemented by this subclass do not consist of several sub-readers, they are atomic. They support retrieval of stored fields, doc values, terms, and postings.

For efficiency, in this API documents are often referred to via document numbers, non-negative integers which each name a unique document in the index. These document numbers are ephemeral -- they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions.

NOTE: IndexReader instances are completely thread safe, meaning multiple threads can call any of its methods, concurrently. If your application requires external synchronization, you should not synchronize on the IndexReader instance; use your own (non-Lucene) objects instead.

Most used methods

  • terms
    This may return null if the field does not exist.
  • maxDoc
  • getFieldInfos
    Get the FieldInfos describing all fields in this reader.
  • getLiveDocs
    Returns the Bits representing live (not deleted) docs. A set bit indicates the doc ID has not been d
  • getSortedDocValues
    Returns SortedDocValues for this field, or null if no SortedDocValues were indexed for this field. T
  • document
  • getBinaryDocValues
    Returns BinaryDocValues for this field, or null if no BinaryDocValues were indexed for this field. T
  • getNumericDocValues
    Returns NumericDocValues for this field, or null if no NumericDocValues were indexed for this field.
  • getSortedSetDocValues
    Returns SortedSetDocValues for this field, or null if no SortedSetDocValues were indexed for this fi
  • getNormValues
    Returns NumericDocValues representing norms for this field, or null if no NumericDocValueswere index
  • getPointValues
    Returns the PointValues used for numeric or spatial searches for the given field, or null if there a
  • numDocs
  • getPointValues,
  • numDocs,
  • docFreq,
  • fields,
  • getSortedNumericDocValues,
  • getTermVector,
  • getTermVectors,
  • postings,
  • getContext,
  • getCoreCacheHelper

Popular in Java

  • Start an intent from android
  • onRequestPermissionsResult (Fragment)
  • getOriginalFilename (MultipartFile)
    Return the original filename in the client's filesystem.This may contain path information depending
  • addToBackStack (FragmentTransaction)
  • FileOutputStream (java.io)
    An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
  • URL (java.net)
    A Uniform Resource Locator that identifies the location of an Internet resource as specified by RFC
  • Random (java.util)
    This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
  • StringTokenizer (java.util)
    Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
  • Modifier (javassist)
    The Modifier class provides static methods and constants to decode class and member access modifiers
  • Table (org.hibernate.mapping)
    A relational table
  • Top Sublime Text plugins
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now