/** * Get number of documents in this index database. * @return number of documents * @throws IOException if I/O exception occurred */ public int getNumFiles() throws IOException { IndexReader ireader = null; int numDocs = 0; try { ireader = DirectoryReader.open(indexDirectory); // open existing index numDocs = ireader.numDocs(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } return numDocs; }
@Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; if (reader.maxDoc() == reader.docFreq(new Term(field))) { return DocIdSet.EMPTY_DOCIDSET; } OpenBitSet res = new OpenBitSet(reader.maxDoc()); TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); try { do { Term term = termEnum.term(); if (term==null || term.field() != field) break; termDocs.seek (termEnum); while (termDocs.next()) { res.fastSet(termDocs.doc()); } } while (termEnum.next()); } finally { termDocs.close(); termEnum.close(); } res.flip(0, reader.maxDoc()); return res; } }
@Override public long getTotalTokenCount() { LuceneSearcher luceneSearcher = getLuceneSearcher(1); try { RegexpQuery query = new RegexpQuery(new Term("totalTokenCount", ".*")); TopDocs docs = luceneSearcher.searcher.search(query, 1000); // Integer.MAX_VALUE might cause OOE on wrong index if (docs.totalHits == 0) { throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory); } else if (docs.totalHits > 1000) { throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits + " in " + luceneSearcher.directory); } else { long result = 0; for (ScoreDoc scoreDoc : docs.scoreDocs) { long tmp = Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("totalTokenCount")); if (tmp > result) { // due to the way FrequencyIndexCreator adds these totalTokenCount fields, we must not sum them, // but take the largest one: result = tmp; } } return result; } } catch (IOException e) { throw new RuntimeException(e); } }
@Override protected synchronized void doClose() throws IOException { IOException ioe = null; for (final IndexReader reader : completeReaderSet) { try { if (closeSubReaders) { reader.close(); } else { reader.decRef(); } } catch (IOException e) { if (ioe == null) ioe = e; } } // throw the first exception if (ioe != null) throw ioe; } }
iwriter = new IndexWriter(directory, iwConfig); Document doc = new Document(); doc.add(new StringField("ID", "10000", Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); TopDocs topDocs = isearcher.search(query, 5); System.out.println("命中:" + topDocs.totalHits); Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("内容:" + targetDoc.toString()); if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); directory.close(); } catch (IOException e) { e.printStackTrace();
IndexSearcher searcher = new IndexSearcher(ireader); TopDocs top = searcher.search(q, 1); if (top.totalHits == 0) { Document doc = searcher.doc(top.scoreDocs[0].doc); String foundPath = doc.get(QueryBuilder.PATH); IndexableField tags = doc.getField(QueryBuilder.TAGS); if (tags != null) { return Definitions.deserialize(tags.binaryValue().bytes); ireader.close();
private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new ClassicSimilarity(){ @Override public float tf(float freq) { for (int i = 0; i < hashes.length; i++) { queryBuilder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); TopDocs docs = searcher.search(queryBuilder.build(), maxResultsHashBased); double tmpScore = 0d; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation(reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); if (resultScoreDocs.size() < maximumHits) {
public static void dumpTags(File file, String field, long maxDocs) throws IOException { Directory dir = FSDirectory.open(file); IndexReader reader = IndexReader.open(dir, true); TermEnum te = reader.terms(new Term(field, "")); do { Term term = te.term(); if (term == null || term.field().equals(field) == false) { break; } System.err.printf("%s %d\n", term.text(), te.docFreq()); } while (te.next()); te.close(); }
Query query = new TermQuery(new Term(GRAMMED_WORDS_FIELD, term)); Sort sort = new Sort(COUNT_FIELD, true); TopDocs docs = autoCompleteSearcher.search(query, null, 5, sort); List<String> suggestions = new ArrayList<String>(); for (ScoreDoc doc : docs.scoreDocs) { suggestions.add(autoCompleteReader.document(doc.doc).get( SOURCE_WORD_FIELD)); wordsMap.put(word, sourceReader.docFreq(new Term( fieldToAutocomplete, word))); sourceReader.close(); autoCompleteReader = IndexReader.open(autoCompleteDirectory); } else { autoCompleteReader.reopen();
if (getBooleanParameter(req, "rewrite")) { final Query rewritten_q = q.rewrite(searcher .getIndexReader()); queryRow.put("rewritten_q", rewritten_q.toString()); final int freq = searcher.getIndexReader().docFreq((Term) term); freqs.put(term.toString(), freq); td = searcher.search(q, skip + limit); } else { td = searcher.search(q, skip + limit, sort); for (final IndexableField f : doc.getFields()) { if (!f.fieldType().stored()) { continue; fetch_ids[i - skip] = doc.get("_id");
private void testSearchSpeed(Class<? extends GlobalFeature> featureClass) throws IOException { ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath, testExtensive, true); parallelIndexer.addExtractor(featureClass); parallelIndexer.run(); IndexReader reader = DirectoryReader.open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE)); Bits liveDocs = MultiFields.getLiveDocs(reader); double queryCount = 0d; ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass); long ms = System.currentTimeMillis(); String fileName; Document queryDoc; ImageSearchHits hits; for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { queryCount += 1d; // ok, we've got a query here for a document ... queryDoc = reader.document(i); hits = searcher.search(queryDoc, reader); } } ms = System.currentTimeMillis() - ms; System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1), (double) ms / queryCount); }
private static void indexDocs(File file, File index, boolean create) throws Exception { if (!create) { // incrementally update reader = IndexReader.open(FSDirectory.open(index), false); // open existing index uidIter = reader.terms(new Term("uid", "")); // init uid iterator indexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.term() != null && uidIter.term().field() == "uid") { System.out.println("deleting " + HTMLDocument.uid2url(uidIter.term().text())); reader.deleteDocuments(uidIter.term()); uidIter.next(); } deleting = false; } uidIter.close(); // close uid iterator reader.close(); // close existing index } else // don't have exisiting indexDocs(file); }
} else { dir = FSDirectory.open(new File(indexDir, proj).toPath()); ir = DirectoryReader.open(dir); t = new Term(QueryBuilder.FULL, builder.getFreetext()); getSuggestion(t, ir, dummy); s.freetext = dummy.toArray(new String[dummy.size()]); t = new Term(QueryBuilder.REFS, builder.getRefs()); getSuggestion(t, ir, dummy); s.refs = dummy.toArray(new String[dummy.size()]); t = new Term(QueryBuilder.DEFS, builder.getDefs()); getSuggestion(t, ir, dummy); s.defs = dummy.toArray(new String[dummy.size()]); if (ir != null && closeOnDestroy) { try { ir.close(); } catch (IOException ex) { LOGGER.log(Level.WARNING, "Got exception while "
private long getCount(Term term, LuceneSearcher luceneSearcher) { long result = 0; try { TopDocs docs = luceneSearcher.searcher.search(new TermQuery(term), 2000); if (docs.totalHits > 2000) { throw new RuntimeException("More than 2000 matches for '" + term + "' not supported for performance reasons: " + docs.totalHits + " matches in " + luceneSearcher.directory); } for (ScoreDoc scoreDoc : docs.scoreDocs) { String countStr = luceneSearcher.reader.document(scoreDoc.doc).get("count"); result += Long.parseLong(countStr); } //System.out.println(term + " -> " + result); } catch (IOException e) { throw new RuntimeException(e); } return result; }
private long innerCount() { try { final int maxDoc = searcher.getIndexReader().maxDoc(); if (maxDoc == 0) { return 0; } TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(createQuery(), getFilter(), collector); return collector.getTotalHits(); } catch (IOException e) { throw new QueryException(e); } catch (IllegalArgumentException e) { throw new QueryException(e); } }
@Override public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { IndexSearcher searcher = (IndexSearcher)context.get("searcher"); TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(true), field); if (sim == null) { throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)"); } int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes)); float idf = sim.idf(docfreq, searcher.getIndexReader().maxDoc()); return new ConstDoubleDocValues(idf, this); }