/** * Sugar for <code>.getIndexReader().document(docID, fieldsToLoad)</code> * @see IndexReader#document(int, Set) */ public Document doc(int docID, Set<String> fieldsToLoad) throws IOException { return reader.document(docID, fieldsToLoad); }
/** * Sugar for <code>.getIndexReader().document(docID, fieldVisitor)</code> * @see IndexReader#document(int, StoredFieldVisitor) */ public void doc(int docID, StoredFieldVisitor fieldVisitor) throws IOException { reader.document(docID, fieldVisitor); }
/** * Sugar for <code>.getIndexReader().document(docID)</code> * @see IndexReader#document(int) */ public Document doc(int docID) throws IOException { return reader.document(docID); }
private long getCount(Term term, LuceneSearcher luceneSearcher) { long result = 0; try { TopDocs docs = luceneSearcher.searcher.search(new TermQuery(term), 2000); if (docs.totalHits > 2000) { throw new RuntimeException("More than 2000 matches for '" + term + "' not supported for performance reasons: " + docs.totalHits + " matches in " + luceneSearcher.directory); } for (ScoreDoc scoreDoc : docs.scoreDocs) { String countStr = luceneSearcher.reader.document(scoreDoc.doc).get("count"); result += Long.parseLong(countStr); } //System.out.println(term + " -> " + result); } catch (IOException e) { throw new RuntimeException(e); } return result; }
public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer, final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException { if (retrievalCount.get() >= maxResults) { return Collections.emptySet(); } final long start = System.nanoTime(); final ScoreDoc[] scoreDocs = topDocs.scoreDocs; final int numDocs = Math.min(scoreDocs.length, maxResults); final List<Document> docs = new ArrayList<>(numDocs); for (int i = numDocs - 1; i >= 0; i--) { final int docId = scoreDocs[i].doc; final Document d = indexReader.document(docId); docs.add(d); } final long readDocuments = System.nanoTime() - start; logger.debug("Reading {} Lucene Documents took {} millis", docs.size(), TimeUnit.NANOSECONDS.toMillis(readDocuments)); return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars); }
@Override public long getTotalTokenCount() { LuceneSearcher luceneSearcher = getLuceneSearcher(1); try { RegexpQuery query = new RegexpQuery(new Term("totalTokenCount", ".*")); TopDocs docs = luceneSearcher.searcher.search(query, 1000); // Integer.MAX_VALUE might cause OOE on wrong index if (docs.totalHits == 0) { throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory); } else if (docs.totalHits > 1000) { throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits + " in " + luceneSearcher.directory); } else { long result = 0; for (ScoreDoc scoreDoc : docs.scoreDocs) { long tmp = Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("totalTokenCount")); if (tmp > result) { // due to the way FrequencyIndexCreator adds these totalTokenCount fields, we must not sum them, // but take the largest one: result = tmp; } } return result; } } catch (IOException e) { throw new RuntimeException(e); } }
/** * Like {@link #document(int)} but only loads the specified * fields. Note that this is simply sugar for {@link * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}. */ public final Document document(int docID, Set<String> fieldsToLoad) throws IOException { final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor( fieldsToLoad); document(docID, visitor); return visitor.getDocument(); }
@Override public final void document(int docID, StoredFieldVisitor visitor) throws IOException { ensureOpen(); final int i = readerIndex(docID); // find subreader num subReaders[i].document(docID - starts[i], visitor); // dispatch to subreader }
.mapToObj(docId -> { try { return indexReader.document(docId, LUCENE_FIELDS_TO_LOAD); } catch (final Exception e) { throw new SearchFailedException("Failed to read Provenance Events from Event File", e);
/** * Returns the stored fields of the <code>n</code><sup>th</sup> * <code>Document</code> in this index. This is just * sugar for using {@link DocumentStoredFieldVisitor}. * <p> * <b>NOTE:</b> for performance reasons, this method does not check if the * requested document is deleted, and therefore asking for a deleted document * may yield unspecified results. Usually this is not required, however you * can test if the doc is deleted by checking the {@link * Bits} returned from {@link MultiFields#getLiveDocs}. * * <b>NOTE:</b> only the content of a field is returned, * if that field was stored during indexing. Metadata * like boost, omitNorm, IndexOptions, tokenized, etc., * are not preserved. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ // TODO: we need a separate StoredField, so that the // Document returned here contains that class not // IndexableField public final Document document(int docID) throws IOException { final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); document(docID, visitor); return visitor.getDocument(); }
long getMaxEventId(final String partitionName) { final List<File> allDirectories = getDirectoryManager().getDirectories(0L, Long.MAX_VALUE, partitionName); if (allDirectories.isEmpty()) { return -1L; } Collections.sort(allDirectories, DirectoryUtils.NEWEST_INDEX_FIRST); for (final File directory : allDirectories) { final EventIndexSearcher searcher; try { searcher = indexManager.borrowIndexSearcher(directory); } catch (final IOException ioe) { logger.warn("Unable to read from Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory); continue; } try { final IndexReader reader = searcher.getIndexSearcher().getIndexReader(); final int maxDocId = reader.maxDoc() - 1; final Document document = reader.document(maxDocId); final long eventId = document.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue(); logger.info("Determined that Max Event ID indexed for Partition {} is approximately {} based on index {}", partitionName, eventId, directory); return eventId; } catch (final IOException ioe) { logger.warn("Unable to search Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory, ioe); } finally { indexManager.returnIndexSearcher(searcher); } } return -1L; }
List<String> suggestions = new ArrayList<String>(); for (ScoreDoc doc : docs.scoreDocs) { suggestions.add(autoCompleteReader.document(doc.doc).get( SOURCE_WORD_FIELD));
++n; Document doc = reader.document(postsIter.docID(), CHECK_FIELDS); if (doc == null) { LOGGER.log(Level.FINER, "No Document: {0}", path);
public void testSearch() throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName))); MetricSpaces.loadReferencePoints(new GZIPInputStream(new FileInputStream("src/test/resources/metricspaces/refPoints_PHOG.dat.gz"))); MetricSpaces.loadReferencePoints(new GZIPInputStream(new FileInputStream("src/test/resources/metricspaces/refPoints_CEDD.dat.gz"))); MetricSpacesImageSearcher is = new MetricSpacesImageSearcher(10, new GZIPInputStream(new FileInputStream("src/test/resources/metricspaces/refPoints_CEDD.dat.gz")), 100); is.setNumHashesUsedForQuery(15); // GenericFastImageSearcher is = new GenericFastImageSearcher(10, CEDD.class, false, reader); for (int i = 0; i < 10; i++) { ImageSearchHits hits = is.search(reader.document(i), reader); for (int j = 0; j < hits.length(); j++) { System.out.printf("%02d: %06d %02.3f\n", j + 1, hits.documentID(j), hits.score(j)); } System.out.println("------< * >-------"); } }
@Override public ScoredMatch apply(RawMatch rawMatch) { int rawId = rawMatch.getRawId(); try { if (payloads.skipTo(rawId) && payloads.doc() == rawId) { payloads.nextPosition(); int size = payloads.getPayloadLength(); if (size > data.length) { data = new byte[size]; } payloads.getPayload(data, 0); match.getDocId().update(data, 0, size); match.setScore(rawMatch.getBoostedScore() / boostedNorm); return match; } else { throw new IllegalArgumentException("rawId:" + rawId + " doesn't exist. Payloads.doc():" + payloads.doc()); } } catch (IOException e) { try { org.apache.lucene.document.Document d = reader.document(rawId); logger.error("Document without payload: " + d.toString()); } catch (Exception ee) { logger.error(ee); } throw new RuntimeException(e); } }
public void testSearchDocValues() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexNameDV))); GenericDocValuesImageSearcher is = new GenericDocValuesImageSearcher(10, CEDD.class, reader); // run search StopWatch sm = new StopWatch(); BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage)); ImageSearchHits hits = null; for (int i = 0; i<numRuns; i++) { sm.start(); hits = is.search(qImage, reader); sm.stop(); } // print results for (int i = 0; i < hits.length(); i++) { String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(i) + ": \t" + fileName); } System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns); }
public void testSearchCaching() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName))); GenericFastImageSearcher is = new GenericFastImageSearcher(10, CEDD.class, true, reader); // run search StopWatch sm = new StopWatch(); BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage)); ImageSearchHits hits = null; for (int i = 0; i<numRuns; i++) { sm.start(); hits = is.search(qImage, reader); sm.stop(); } // print results for (int i = 0; i < hits.length(); i++) { String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(i) + ": \t" + fileName); } System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns); }
protected void init() { // put all respective features into an in-memory cache ... if (isCaching && reader != null) { Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); featureCache = new LinkedHashMap<Integer, SearchItemForEvaluation>(docs); try { Document d; for (int i = 0; i < docs; i++) { if (!(reader.hasDeletions() && !liveDocs.get(i))) { d = reader.document(i); cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length); featureCache.put(i, new SearchItemForEvaluation(cachedInstance.getByteArrayRepresentation(), new SimpleResultForEvaluation(-1d, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]))); } } } catch (IOException e) { e.printStackTrace(); } } }
protected void init() { // put all respective features into an in-memory cache ... if (isCaching && reader != null) { Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); featureCache = new LinkedHashMap<Integer, SearchItemForEvaluation>(docs); try { Document d; for (int i = 0; i < docs; i++) { if (!(reader.hasDeletions() && !liveDocs.get(i))) { d = reader.document(i); cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length); featureCache.put(i, new SearchItemForEvaluation(cachedInstance.getByteArrayRepresentation(), new SimpleResultForEvaluation(-1d, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]))); } } } catch (IOException e) { e.printStackTrace(); } } }
public void testSearchPlain() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexName))); GenericFastImageSearcher is = new GenericFastImageSearcher(10, CEDD.class, false, reader); // run search StopWatch sm = new StopWatch(); BufferedImage qImage = ImageIO.read(new FileInputStream(queryImage)); ImageSearchHits hits = null; for (int i = 0; i< numRuns; i++) { sm.start(); hits = is.search(qImage, reader); sm.stop(); } // print results for (int i = 0; i < hits.length(); i++) { String fileName = reader.document(hits.documentID(i)).getValues(GlobalDocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(i) + ": \t" + fileName); } System.out.printf("\nTime for searching: %02.3f ms\n", sm.getTime() / (double) numRuns); } }