Refine search
private List<FileExtra> processHits(IndexSearcher searcher, TopDocs hits) throws IOException { List<FileExtra> results = new ArrayList<>(); for (ScoreDoc sd : hits.scoreDocs) { Document d = searcher.doc(sd.doc); String filepath = d.get(QueryBuilder.PATH); Integer numlines = tryParseInt(d.get(QueryBuilder.NUML)); Integer loc = tryParseInt(d.get(QueryBuilder.LOC)); FileExtra extra = new FileExtra(filepath, numlines, loc); results.add(extra); } return results; }
IndexSearcher searcher = new IndexSearcher(directory); QueryParser parser = new QueryParser(Version.LUCENE_30, "content", analyzer); Query query = parser.parse("terms to search"); TopScoreDocCollector collector = TopScoreDocCollector.create(HOW_MANY_RESULTS_TO_COLLECT, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // `i` is just a number of document in Lucene. Note, that this number may change after document deletion for (int i = 0; i < hits.length; i++) { Document hitDoc = searcher.doc(hits[i].doc); // getting actual document System.out.println("Title: " + hitDoc.get("title")); System.out.println("Content: " + hitDoc.get("content")); System.out.println(); }
private long getCount(Term term, LuceneSearcher luceneSearcher) { long result = 0; try { TopDocs docs = luceneSearcher.searcher.search(new TermQuery(term), 2000); if (docs.totalHits > 2000) { throw new RuntimeException("More than 2000 matches for '" + term + "' not supported for performance reasons: " + docs.totalHits + " matches in " + luceneSearcher.directory); } for (ScoreDoc scoreDoc : docs.scoreDocs) { String countStr = luceneSearcher.reader.document(scoreDoc.doc).get("count"); result += Long.parseLong(countStr); } //System.out.println(term + " -> " + result); } catch (IOException e) { throw new RuntimeException(e); } return result; }
/** * Finds next element. * * @throws IgniteCheckedException If failed. */ @SuppressWarnings("unchecked") private void findNext() throws IgniteCheckedException { curr = null; while (idx < docs.length) { Document doc; try { doc = searcher.doc(docs[idx++].doc); } catch (IOException e) { throw new IgniteCheckedException(e); } ClassLoader ldr = null; if (ctx != null && ctx.deploy().enabled()) ldr = ctx.cache().internalCache(cacheName).context().deploy().globalLoader(); K k = unmarshall(doc.getBinaryValue(KEY_FIELD_NAME).bytes, ldr); if (filters != null && !filters.apply(k)) continue; V v = type.valueClass() == String.class ? (V)doc.get(VAL_STR_FIELD_NAME) : this.<V>unmarshall(doc.getBinaryValue(VAL_FIELD_NAME).bytes, ldr); assert v != null; curr = new IgniteBiTuple<>(k, v); break; } }
doc = searcher.doc(docId); } catch (IOException e) { LOGGER.log(Level.WARNING, "ERROR getting searcher doc(int)", e); FileAnalyzer anz = fac.getAnalyzer(); String path = doc.get(QueryBuilder.PATH); String pathE = Util.URIEncodePath(path); String urlPrefixE = urlPrefix == null ? "" : Util.URIEncodePath(
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { GlobalFeature queryFeature = null; try { queryFeature = feature.getClass().newInstance(); } catch (InstantiationException | IllegalAccessException e) { e.printStackTrace(); return null; } if (useDocValues) { docValues = MultiDocValues.getBinaryValues(reader, featureFieldName); // find the id of the document in the reader, then do search ... TODO: find another way instead of calling the searcher every time. TopDocs topDocs = searcher.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.get(DocumentBuilder.FIELD_NAME_IDENTIFIER))), 1); if (topDocs.totalHits > 0) { int docID = topDocs.scoreDocs[0].doc; docValues.advanceExact(docID); queryFeature.setByteArrayRepresentation(docValues.binaryValue().bytes, docValues.binaryValue().offset, docValues.binaryValue().length); return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, searcher.getIndexReader()); } } else { queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes, doc.getBinaryValue(featureFieldName).offset, doc.getBinaryValue(featureFieldName).length); return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, reader); } return null; }
private void queryIndex(String key, Operation op) throws Throwable { IndexWriter w = this.writer; if (w == null) { op.fail(new CancellationException()); return; } IndexSearcher s = updateSearcher(key, w); Query linkQuery = new TermQuery(new Term(URI_PARAM_NAME_KEY, key)); TopDocs hits = s.search(linkQuery, 1, this.timeSort, false, false); if (hits.totalHits == 0) { op.complete(); return; } Document hitDoc = s.doc(hits.scoreDocs[0].doc); BytesRef content = hitDoc.getBinaryValue(LUCENE_FIELD_NAME_BINARY_CONTENT); long updateTime = Long.parseLong(hitDoc.get(URI_PARAM_NAME_UPDATE_TIME)); Object hydratedInstance = Utils.fromBytes(content.bytes, content.offset, content.length); applyBlobRetentionPolicy(linkQuery, updateTime); op.setBodyNoCloning(hydratedInstance).complete(); }
private Document wrapperDocument(IndexSearcher indexSearcher, Set<String> fields, Highlighter highlighter, Analyzer analyzer, ScoreDoc scoreDoc) throws Exception { org.apache.lucene.document.Document searchDoc = indexSearcher.doc(scoreDoc.doc); List<Field> newFields = new ArrayList<Field>(); List<IndexableField> indexableFields = searchDoc.getFields(); for (IndexableField indexableField : indexableFields) { String fieldName = indexableField.name(); String template = null; if (fields.contains(fieldName)) { String fieldText = searchDoc.get(fieldName); if (fieldText != null) { template = highlighter.getBestFragment(analyzer, fieldName, fieldText); } } if (template != null) { //高亮模板存在,执行高亮包装 newFields.add(new HighlightFieldWrapper(indexableField, luceneBuilder.getPerfix(), luceneBuilder.getSuffix(), template)); } else { //不存在,按普通字段包装 newFields.add(new FieldWrapper(indexableField)); } } return new DocumentWrapper(newFields); }
public void listAllDocuments() throws IOException { setIndexReaderSearcher(); for(int i = 0; i < reader.numDocs(); i++){ Document d = searcher.doc(i); // byte[] sent = d.getBinaryValue("tokens").bytes; // List<CoreLabel> tokens = readProtoBufAnnotation(sent); System.out.println(d.get("sentid")); } }
@Override public long getTotalTokenCount() { LuceneSearcher luceneSearcher = getLuceneSearcher(1); try { RegexpQuery query = new RegexpQuery(new Term("totalTokenCount", ".*")); TopDocs docs = luceneSearcher.searcher.search(query, 1000); // Integer.MAX_VALUE might cause OOE on wrong index if (docs.totalHits == 0) { throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory); } else if (docs.totalHits > 1000) { throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits + " in " + luceneSearcher.directory); } else { long result = 0; for (ScoreDoc scoreDoc : docs.scoreDocs) { long tmp = Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("totalTokenCount")); if (tmp > result) { // due to the way FrequencyIndexCreator adds these totalTokenCount fields, we must not sum them, // but take the largest one: result = tmp; } } return result; } } catch (IOException e) { throw new RuntimeException(e); } }
searcher.search(query, collector); } catch (Exception e) { // this exception should never be hit, since search() will hit this before LOGGER.log( int docId = hits[i].doc; try { d = searcher.doc(docId); } catch (Exception e) { LOGGER.log( try { Document doc = docs.get(ii); String filename = doc.get(QueryBuilder.PATH); Genre genre = Genre.get(doc.get(QueryBuilder.T)); Definitions tags = null; IndexableField tagsField = doc.getField(QueryBuilder.TAGS);
@SuppressWarnings("unchecked") @Override public void collect(int doc) throws IOException { Document document = searcher.doc(context.docBase+doc); if (indexVersion.equals(document.get(BLOB_INDEX_VERSION.name()))) { BytesRef bytesRef = document.getBinaryValue(BLOB_SYMBOL_LIST.name()); if (bytesRef != null) { try { symbolsRef.set((List<Symbol>) SerializationUtils.deserialize(bytesRef.bytes)); } catch (Exception e) { logger.error("Error deserializing symbols", e); } } } }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { GlobalFeature queryFeature = null; try { queryFeature = feature.getClass().newInstance(); } catch (InstantiationException | IllegalAccessException e) { e.printStackTrace(); return null; } if (useDocValues) { docValues = MultiDocValues.getBinaryValues(reader, featureFieldName); // find the id of the document in the reader, then do search ... TODO: find another way instead of calling the searcher every time. TopDocs topDocs = searcher.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.get(DocumentBuilder.FIELD_NAME_IDENTIFIER))), 1); if (topDocs.totalHits > 0) { int docID = topDocs.scoreDocs[0].doc; docValues.advanceExact(docID); queryFeature.setByteArrayRepresentation(docValues.binaryValue().bytes, docValues.binaryValue().offset, docValues.binaryValue().length); return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, searcher.getIndexReader()); } } else { queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes, doc.getBinaryValue(featureFieldName).offset, doc.getBinaryValue(featureFieldName).length); return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, reader); } return null; }
private long getCount(Term term, LuceneSearcher luceneSearcher) { long result = 0; try { TopDocs docs = luceneSearcher.searcher.search(new TermQuery(term), 2000); if (docs.totalHits > 2000) { throw new RuntimeException("More than 2000 matches for '" + term + "' not supported for performance reasons: " + docs.totalHits + " matches in " + luceneSearcher.directory); } for (ScoreDoc scoreDoc : docs.scoreDocs) { String countStr = luceneSearcher.reader.document(scoreDoc.doc).get("count"); result += Long.parseLong(countStr); } //System.out.println(term + " -> " + result); } catch (IOException e) { throw new RuntimeException(e); } return result; }
TopFieldDocs fdocs = searcher.search(query, start + maxItems, sort); totalHits = fdocs.totalHits; hits = fdocs.scoreDocs; Document doc = searcher.doc(hits[0].doc); if (doc.getField(QueryBuilder.TAGS) != null) { byte[] rawTags = doc.getField(QueryBuilder.TAGS).binaryValue().bytes; + Util.URIEncodePath(searcher.doc(hits[0].doc).get(QueryBuilder.PATH)) + '#' + Util.URIEncode(((TermQuery) query).getTerm().text());