Refine search
Analyzer analyzer = new FNLPAnalyzer(Version.LUCENE_47); DirectoryReader ireader = DirectoryReader.open(dir); IndexSearcher isearcher = new IndexSearcher(ireader); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; Document hitDoc = isearcher.doc(hits[i].doc); System.out.println(hitDoc.get("content")); System.out.println(hits[i].score); ireader.close(); dir.close();
public TermWeight(IndexSearcher searcher, boolean needsScores, float boost, TermContext termStates) throws IOException { super(TermQuery.this); if (needsScores && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.needsScores = needsScores; this.termStates = termStates; this.similarity = searcher.getSimilarity(needsScores); final CollectionStatistics collectionStats; final TermStatistics termStats; if (needsScores) { collectionStats = searcher.collectionStatistics(term.field()); termStats = searcher.termStatistics(term, termStates); } else { // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1 final int maxDoc = searcher.getIndexReader().maxDoc(); collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1); termStats = new TermStatistics(term.bytes(), maxDoc, -1); } this.stats = similarity.computeWeight(boost, collectionStats, termStats); }
private static void closeSearcher(IndexSearcher searcher) { IndexReader indexReader = searcher.getIndexReader(); try { searcher.close(); } catch(IOException ignore) {/**/} try { indexReader.close(); } catch(IOException ignore) {/**/} } }
static Document findDocument( IndexType type, IndexSearcher searcher, long entityId ) { try { TopDocs docs = searcher.search( type.idTermQuery( entityId ), 1 ); if ( docs.scoreDocs.length > 0 ) { return searcher.doc( docs.scoreDocs[0].doc ); } return null; } catch ( IOException e ) { throw new RuntimeException( e ); } }
private static Scorer getScorer(Query query, CodecReader reader) throws IOException { IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); Weight weight = s.createWeight(s.rewrite(query), false, 1.0f); return weight.scorer(reader.getContext()); }
private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new ClassicSimilarity(){ @Override public float tf(float freq) { for (int i = 0; i < hashes.length; i++) { queryBuilder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); TopDocs docs = searcher.search(queryBuilder.build(), maxResultsHashBased); double tmpScore = 0d; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation(reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); if (resultScoreDocs.size() < maximumHits) {
IndexSearcher searcher = new IndexSearcher(ireader); TopDocs top = searcher.search(q, 1); if (top.totalHits == 0) { Document doc = searcher.doc(top.scoreDocs[0].doc); String foundPath = doc.get(QueryBuilder.PATH); IndexableField tags = doc.getField(QueryBuilder.TAGS); if (tags != null) { return Definitions.deserialize(tags.binaryValue().bytes); ireader.close();
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { GlobalFeature queryFeature = null; try { queryFeature = feature.getClass().newInstance(); } catch (InstantiationException | IllegalAccessException e) { e.printStackTrace(); return null; } if (useDocValues) { docValues = MultiDocValues.getBinaryValues(reader, featureFieldName); // find the id of the document in the reader, then do search ... TODO: find another way instead of calling the searcher every time. TopDocs topDocs = searcher.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.get(DocumentBuilder.FIELD_NAME_IDENTIFIER))), 1); if (topDocs.totalHits > 0) { int docID = topDocs.scoreDocs[0].doc; docValues.advanceExact(docID); queryFeature.setByteArrayRepresentation(docValues.binaryValue().bytes, docValues.binaryValue().offset, docValues.binaryValue().length); return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, searcher.getIndexReader()); } } else { queryFeature.setByteArrayRepresentation(doc.getBinaryValue(featureFieldName).bytes, doc.getBinaryValue(featureFieldName).offset, doc.getBinaryValue(featureFieldName).length); return search(MetricSpaces.generateBoostedQuery(queryFeature, numHashesUsedForQuery), queryFeature, reader); } return null; }
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { IndexSearcher is = new IndexSearcher(reader); TermQuery tq = new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, doc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); TopDocs topDocs = is.search(tq, 1); if (topDocs.totalHits > 0) { return search(topDocs.scoreDocs[0].doc); } else return null; }
public String generateRSS(Path indexFile) throws CorruptIndexException, IOException { StringBuffer output = new StringBuffer(); output.append(getRSSHeaders()); IndexSearcher searcher = null; try { reader = DirectoryReader.open(FSDirectory.open(indexFile)); searcher = new IndexSearcher(reader); GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); gc.setTime(new Date()); String nowDateTime = ISO8601.format(gc); gc.add(java.util.GregorianCalendar.MINUTE, -5); String fiveMinsAgo = ISO8601.format(gc); TermRangeQuery query = new TermRangeQuery( TikaCoreProperties.CREATED.getName(), new BytesRef(fiveMinsAgo), new BytesRef(nowDateTime), true, true); TopScoreDocCollector collector = TopScoreDocCollector.create(20); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); output.append(getRSSItem(doc)); } } finally { if (reader != null) reader.close(); } output.append(getRSSFooters()); return output.toString(); }
protected static void doReadOperation(Directory d) throws Exception { IndexReader indexReader = null; IndexSearcher search = null; try { indexReader = DirectoryReader.open(d); // this is a read search = new IndexSearcher(indexReader); // dummy query that probably won't return anything Term term = new Term( "path", "good" ); TermQuery termQuery = new TermQuery(term); search.search(termQuery, 1); } finally { if (search != null) { indexReader.close(); } } }
for(String en2: en.getValue()){ if(!processedKey || !stopWords.contains(en2.toLowerCase())) builder.add(new BooleanClause(new TermQuery(new Term(en.getKey(), en2)), BooleanClause.Occur.MUST)); TopDocs tp = searcher.search(query, Integer.MAX_VALUE); Set<String> sentids = new HashSet<>(); if (tp.totalHits > 0) { for (ScoreDoc s : tp.scoreDocs) { int docId = s.doc; Document d = searcher.doc(docId); sentids.add(d.get("sentid"));
flowFileIdQuery = new BooleanQuery(); for (final String flowFileUuid : flowFileUuids) { flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD); final TopDocs uuidQueryTopDocs = searcher.getIndexSearcher().search(flowFileIdQuery, MAX_QUERY_RESULTS); final long searchEnd = System.nanoTime(); final Set<ProvenanceEventRecord> recs = docsToEventConverter.convert(uuidQueryTopDocs, searcher.getIndexSearcher().getIndexReader());
@Override public Map<Integer, Set<E>> getPatternsForAllTokens(String sentId) { try { TermQuery query = new TermQuery(new Term("sentid", sentId)); TopDocs tp = searcher.search(query,1); if (tp.totalHits > 0) { for (ScoreDoc s : tp.scoreDocs) { int docId = s.doc; Document d = searcher.doc(docId); byte[] st = d.getBinaryValue("patterns").bytes; ByteArrayInputStream baip = new ByteArrayInputStream(st); ObjectInputStream ois = new ObjectInputStream(baip); return (Map<Integer, Set<E>>) ois.readObject(); } } else throw new RuntimeException("Why no patterns for sentid " + sentId + ". Number of documents in index are " + size()); }catch(IOException e){ throw new RuntimeException(e); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } return null; }
termQuery.add(new PrefixQuery(new Term("name", queryString)), BooleanClause.Occur.SHOULD); } else { List<String> list = new ArrayList<String>(); PhraseQuery phraseQuery = new PhraseQuery(); for (String phraseToken : token.substring(1, token.length() - 1).split(" ")) { phraseQuery.add(new Term("name", phraseToken.toLowerCase())); termQuery.add(new TermQuery(new Term("code", token)), BooleanClause.Occur.SHOULD); BooleanQuery typeQuery = new BooleanQuery(); if (stops) { typeQuery.add(new TermQuery(new Term("category", Category.STOP.name())), BooleanClause.Occur.SHOULD); typeQuery.add(new TermQuery(new Term("category", Category.CLUSTER.name())), BooleanClause.Occur.SHOULD); try { TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); searcher.search(query, collector); ScoreDoc[] docs = collector.topDocs().scoreDocs; for (int i = 0; i < docs.length; i++) { LuceneResult lr = new LuceneResult(); Document doc = searcher.doc(docs[i].doc); lr.lat = doc.getField("lat").numericValue().doubleValue(); lr.lng = doc.getField("lon").numericValue().doubleValue(); String category = doc.getField("category").stringValue().toLowerCase(); String code; if (doc.getField("code") != null){
public List<String> getBuildIds() throws IOException { final IndexReader reader = DirectoryReader.open(luceneDirectory); final IndexSearcher searcher = new IndexSearcher(reader); final Query everyDoc = new MatchAllDocsQuery(); final List<String> buildIds = new ArrayList<>(reader.maxDoc()); searcher.search(everyDoc, 12 ); return buildIds; }
@Override public long getTotalTokenCount() { LuceneSearcher luceneSearcher = getLuceneSearcher(1); try { RegexpQuery query = new RegexpQuery(new Term("totalTokenCount", ".*")); TopDocs docs = luceneSearcher.searcher.search(query, 1000); // Integer.MAX_VALUE might cause OOE on wrong index if (docs.totalHits == 0) { throw new RuntimeException("Expected 'totalTokenCount' meta documents not found in 1grams index: " + luceneSearcher.directory); } else if (docs.totalHits > 1000) { throw new RuntimeException("Did not expect more than 1000 'totalTokenCount' meta documents: " + docs.totalHits + " in " + luceneSearcher.directory); } else { long result = 0; for (ScoreDoc scoreDoc : docs.scoreDocs) { long tmp = Long.parseLong(luceneSearcher.reader.document(scoreDoc.doc).get("totalTokenCount")); if (tmp > result) { // due to the way FrequencyIndexCreator adds these totalTokenCount fields, we must not sum them, // but take the largest one: result = tmp; } } return result; } } catch (IOException e) { throw new RuntimeException(e); } }
TopFieldDocs fdocs = searcher.search(query, start + maxItems, sort); totalHits = fdocs.totalHits; hits = fdocs.scoreDocs; Document doc = searcher.doc(hits[0].doc); if (doc.getField(QueryBuilder.TAGS) != null) { byte[] rawTags = doc.getField(QueryBuilder.TAGS).binaryValue().bytes; Definitions tags = Definitions.deserialize(rawTags); String symbol = ((TermQuery) query).getTerm().text(); if (tags.occurrences(symbol) == 1) { uniqueDefinition = true; + Util.URIEncodePath(searcher.doc(hits[0].doc).get(QueryBuilder.PATH)) + '#' + Util.URIEncode(((TermQuery) query).getTerm().text());
private long getCount(Term term, LuceneSearcher luceneSearcher) { long result = 0; try { TopDocs docs = luceneSearcher.searcher.search(new TermQuery(term), 2000); if (docs.totalHits > 2000) { throw new RuntimeException("More than 2000 matches for '" + term + "' not supported for performance reasons: " + docs.totalHits + " matches in " + luceneSearcher.directory); } for (ScoreDoc scoreDoc : docs.scoreDocs) { String countStr = luceneSearcher.reader.document(scoreDoc.doc).get("count"); result += Long.parseLong(countStr); } //System.out.println(term + " -> " + result); } catch (IOException e) { throw new RuntimeException(e); } return result; }
Query query = new TermQuery(new Term(GRAMMED_WORDS_FIELD, term)); Sort sort = new Sort(COUNT_FIELD, true); TopDocs docs = autoCompleteSearcher.search(query, null, 5, sort); List<String> suggestions = new ArrayList<String>(); for (ScoreDoc doc : docs.scoreDocs) { suggestions.add(autoCompleteReader.document(doc.doc).get( SOURCE_WORD_FIELD)); wordsMap.put(word, sourceReader.docFreq(new Term( fieldToAutocomplete, word))); sourceReader.close();