org.apache.lucene.index.IndexReader.document java code examples

/** 
 * Sugar for <code>.getIndexReader().document(docID, fieldsToLoad)</code>
 * @see IndexReader#document(int, Set) 
 */
public Document doc(int docID, Set<String> fieldsToLoad) throws IOException {
 return reader.document(docID, fieldsToLoad);
}

/** 
 * Sugar for <code>.getIndexReader().document(docID, fieldVisitor)</code>
 * @see IndexReader#document(int, StoredFieldVisitor) 
 */
public void doc(int docID, StoredFieldVisitor fieldVisitor) throws IOException {
 reader.document(docID, fieldVisitor);
}

/** 
 * Sugar for <code>.getIndexReader().document(docID)</code> 
 * @see IndexReader#document(int) 
 */
public Document doc(int docID) throws IOException {
 return reader.document(docID);
}

private long getCount(Term term, LuceneSearcher luceneSearcher) {
 long result = 0;
 try {
  TopDocs docs = luceneSearcher.searcher.search(new TermQuery(term), 2000);
  if (docs.totalHits > 2000) {
   throw new RuntimeException("More than 2000 matches for '" + term + "' not supported for performance reasons: " +
                 docs.totalHits + " matches in " + luceneSearcher.directory);
  }
  for (ScoreDoc scoreDoc : docs.scoreDocs) {
   String countStr = luceneSearcher.reader.document(scoreDoc.doc).get("count");
   result += Long.parseLong(countStr);
  }
  //System.out.println(term + " -> " + result);
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
 return result;
}

public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer, final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles,
    final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {
  if (retrievalCount.get() >= maxResults) {
    return Collections.emptySet();
  }
  final long start = System.nanoTime();
  final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  final int numDocs = Math.min(scoreDocs.length, maxResults);
  final List<Document> docs = new ArrayList<>(numDocs);
  for (int i = numDocs - 1; i >= 0; i--) {
    final int docId = scoreDocs[i].doc;
    final Document d = indexReader.document(docId);
    docs.add(d);
  }
  final long readDocuments = System.nanoTime() - start;
  logger.debug("Reading {} Lucene Documents took {} millis", docs.size(), TimeUnit.NANOSECONDS.toMillis(readDocuments));
  return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars);
}

@Override
public long getTotalTokenCount() {
 LuceneSearcher luceneSearcher = getLuceneSearcher(1);
 try {
  RegexpQuery query = new RegexpQuery(new Term("totalTokenCount", ".*"));
  TopDocs docs = luceneSearcher.searcher.search(query, 1000);  // Integer.MAX_VALUE might cause OOE on wrong index
  if (docs.totalHits == 0) {
   throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory);
  } else if (docs.totalHits > 1000) {
   throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits + " in " + luceneSearcher.directory);
  } else {
   long result = 0;
   for (ScoreDoc scoreDoc : docs.scoreDocs) {
    long tmp = Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("totalTokenCount"));
    if (tmp > result) {
     // due to the way FrequencyIndexCreator adds these totalTokenCount fields, we must not sum them,
     // but take the largest one:
     result = tmp;
    }
   }
   return result;
  }
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
}

/**
 * Like {@link #document(int)} but only loads the specified
 * fields.  Note that this is simply sugar for {@link
 * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}.
 */
public final Document document(int docID, Set<String> fieldsToLoad)
  throws IOException {
 final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(
   fieldsToLoad);
 document(docID, visitor);
 return visitor.getDocument();
}

@Override
public final void document(int docID, StoredFieldVisitor visitor) throws IOException {
 ensureOpen();
 final int i = readerIndex(docID);                          // find subreader num
 subReaders[i].document(docID - starts[i], visitor);    // dispatch to subreader
}

.mapToObj(docId -> {
  try {
    return indexReader.document(docId, LUCENE_FIELDS_TO_LOAD);
  } catch (final Exception e) {
    throw new SearchFailedException("Failed to read Provenance Events from Event File", e);

/**
 * Returns the stored fields of the <code>n</code><sup>th</sup>
 * <code>Document</code> in this index.  This is just
 * sugar for using {@link DocumentStoredFieldVisitor}.
 * <p>
 * <b>NOTE:</b> for performance reasons, this method does not check if the
 * requested document is deleted, and therefore asking for a deleted document
 * may yield unspecified results. Usually this is not required, however you
 * can test if the doc is deleted by checking the {@link
 * Bits} returned from {@link MultiFields#getLiveDocs}.
 *
 * <b>NOTE:</b> only the content of a field is returned,
 * if that field was stored during indexing.  Metadata
 * like boost, omitNorm, IndexOptions, tokenized, etc.,
 * are not preserved.
 * 
 * @throws CorruptIndexException if the index is corrupt
 * @throws IOException if there is a low-level IO error
 */
// TODO: we need a separate StoredField, so that the
// Document returned here contains that class not
// IndexableField
public final Document document(int docID) throws IOException {
 final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
 document(docID, visitor);
 return visitor.getDocument();
}

long getMaxEventId(final String partitionName) {
  final List<File> allDirectories = getDirectoryManager().getDirectories(0L, Long.MAX_VALUE, partitionName);
  if (allDirectories.isEmpty()) {
    return -1L;
  }
  Collections.sort(allDirectories, DirectoryUtils.NEWEST_INDEX_FIRST);
  for (final File directory : allDirectories) {
    final EventIndexSearcher searcher;
    try {
      searcher = indexManager.borrowIndexSearcher(directory);
    } catch (final IOException ioe) {
      logger.warn("Unable to read from Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory);
      continue;
    }
    try {
      final IndexReader reader = searcher.getIndexSearcher().getIndexReader();
      final int maxDocId = reader.maxDoc() - 1;
      final Document document = reader.document(maxDocId);
      final long eventId = document.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue();
      logger.info("Determined that Max Event ID indexed for Partition {} is approximately {} based on index {}", partitionName, eventId, directory);
      return eventId;
    } catch (final IOException ioe) {
      logger.warn("Unable to search Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory, ioe);
    } finally {
      indexManager.returnIndexSearcher(searcher);
    }
  }
  return -1L;
}

List<String> suggestions = new ArrayList<String>();
for (ScoreDoc doc : docs.scoreDocs) {
  suggestions.add(autoCompleteReader.document(doc.doc).get(
      SOURCE_WORD_FIELD));

++n;
Document doc = reader.document(postsIter.docID(), CHECK_FIELDS);
if (doc == null) {
  LOGGER.log(Level.FINER, "No Document: {0}", path);

  public void testSearch() throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName)));
    MetricSpaces.loadReferencePoints(new GZIPInputStream(new FileInputStream("src/test/resources/metricspaces/refPoints_PHOG.dat.gz")));
    MetricSpaces.loadReferencePoints(new GZIPInputStream(new FileInputStream("src/test/resources/metricspaces/refPoints_CEDD.dat.gz")));
    MetricSpacesImageSearcher is = new MetricSpacesImageSearcher(10, new GZIPInputStream(new FileInputStream("src/test/resources/metricspaces/refPoints_CEDD.dat.gz")), 100);
    is.setNumHashesUsedForQuery(15);
//        GenericFastImageSearcher is = new GenericFastImageSearcher(10, CEDD.class, false, reader);
    for (int i = 0; i < 10; i++) {
      ImageSearchHits hits = is.search(reader.document(i), reader);
      for (int j = 0; j < hits.length(); j++) {
        System.out.printf("%02d: %06d %02.3f\n", j + 1, hits.documentID(j), hits.score(j));
      }
      System.out.println("------< * >-------");
    }
  }

@Override
public ScoredMatch apply(RawMatch rawMatch) {
  int rawId = rawMatch.getRawId();
  try {
    if (payloads.skipTo(rawId) && payloads.doc() == rawId) {
      payloads.nextPosition();
      int size = payloads.getPayloadLength();
      if (size > data.length) {
        data = new byte[size];
      }
      payloads.getPayload(data, 0);
      match.getDocId().update(data, 0, size);
      match.setScore(rawMatch.getBoostedScore() / boostedNorm);
      return match;
    } else {
      throw new IllegalArgumentException("rawId:" + rawId + " doesn't exist. Payloads.doc():" + payloads.doc());
    }
  } catch (IOException e) {
    try {
      org.apache.lucene.document.Document d = reader.document(rawId);
      logger.error("Document without payload: " + d.toString());
    } catch (Exception ee) {
      logger.error(ee);
    }
    throw new RuntimeException(e);
  }
}

public void testSearchDocValues() throws IOException {
  IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexNameDV)));
  GenericDocValuesImageSearcher is = new GenericDocValuesImageSearcher(10, CEDD.class, reader);
  // run search
  StopWatch sm = new StopWatch();
  BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage));
  ImageSearchHits hits = null;
  for (int i = 0; i<numRuns; i++) {
    sm.start();
    hits = is.search(qImage, reader);
    sm.stop();
  }
  // print results
  for (int i = 0; i < hits.length(); i++) {
    String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
    System.out.println(hits.score(i) + ": \t" + fileName);
  }
  System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns);
}

public void testSearchCaching() throws IOException {
  IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName)));
  GenericFastImageSearcher is = new GenericFastImageSearcher(10, CEDD.class, true, reader);
  // run search
  StopWatch sm = new StopWatch();
  BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage));
  ImageSearchHits hits = null;
  for (int i = 0; i<numRuns; i++) {
    sm.start();
    hits = is.search(qImage, reader);
    sm.stop();
  }
  // print results
  for (int i = 0; i < hits.length(); i++) {
    String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
    System.out.println(hits.score(i) + ": \t" + fileName);
  }
  System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns);
}

protected void init() {
  // put all respective features into an in-memory cache ...
  if (isCaching && reader != null) {
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    int docs = reader.numDocs();
    featureCache = new LinkedHashMap<Integer, SearchItemForEvaluation>(docs);
    try {
      Document d;
      for (int i = 0; i < docs; i++) {
        if (!(reader.hasDeletions() && !liveDocs.get(i))) {
          d = reader.document(i);
          cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
          featureCache.put(i, new SearchItemForEvaluation(cachedInstance.getByteArrayRepresentation(), new SimpleResultForEvaluation(-1d, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])));
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}

protected void init() {
  // put all respective features into an in-memory cache ...
  if (isCaching && reader != null) {
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    int docs = reader.numDocs();
    featureCache = new LinkedHashMap<Integer, SearchItemForEvaluation>(docs);
    try {
      Document d;
      for (int i = 0; i < docs; i++) {
        if (!(reader.hasDeletions() && !liveDocs.get(i))) {
          d = reader.document(i);
          cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
          featureCache.put(i, new SearchItemForEvaluation(cachedInstance.getByteArrayRepresentation(), new SimpleResultForEvaluation(-1d, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])));
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}

  public void testSearchPlain() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName)));
    GenericFastImageSearcher is = new GenericFastImageSearcher(10, CEDD.class, false, reader);
    // run search
    StopWatch sm = new StopWatch();
    BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage));
    ImageSearchHits hits = null;
    for (int i = 0; i< numRuns; i++) {
      sm.start();
      hits = is.search(qImage, reader);
      sm.stop();
    }
    // print results
    for (int i = 0; i < hits.length(); i++) {
      String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
      System.out.println(hits.score(i) + ": \t" + fileName);
    }
    System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns);
  }
}

Javadoc

Returns the stored fields of the nth Document in this index. This is just sugar for using DocumentStoredFieldVisitor.

NOTE: for performance reasons, this method does not check if the requested document is deleted, and therefore asking for a deleted document may yield unspecified results. Usually this is not required, however you can test if the doc is deleted by checking the Bits returned from MultiFields#getLiveDocs. NOTE: only the content of a field is returned, if that field was stored during indexing. Metadata like boost, omitNorm, IndexOptions, tokenized, etc., are not preserved.

Popular methods of IndexReader

close
Closes files associated with this index. Also saves any new deletions to disk. No other methods shou
numDocs
Returns the number of documents in this index.
maxDoc
Returns one greater than the largest possible document number. This may be used to, e.g., determine
open
docFreq
Returns the number of documents containing theterm. This method returns 0 if the term or field does
leaves
Returns the reader's leaves, or itself if this reader is atomic. This is a convenience method callin
terms
Returns an enumeration of all terms starting at a given term. If the given term does not exist, the
termDocs
Returns an enumeration of all the documents which containterm. For each document, the document numbe
indexExists
Returns true if an index exists at the specified directory. If the directory does not exist or if th
hasDeletions
Returns true if any documents have been deleted. Implementers should consider overriding this method
isDeleted
Returns true if document n has been deleted
decRef
Expert: decreases the refCount of this IndexReader instance. If the refCount drops to 0, then this r

Popular in Java

Making http post requests using okhttp
compareTo (BigDecimal)
findViewById (Activity)
getApplicationContext (Context)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
JComboBox (javax.swing)
Top plugins for Android Studio

How to use documentmethodin org.apache.lucene.index.IndexReader

Best Java code snippets using org.apache.lucene.index.IndexReader.document (Showing top 20 results out of 882)

How to use
document
method
in
org.apache.lucene.index.IndexReader