/** Runs searches for each segment separately, using the * provided ExecutorService. IndexSearcher will not * close/awaitTermination this ExecutorService on * close; you must do so, eventually, on your own. NOTE: * if you are using {@link NIOFSDirectory}, do not use * the shutdownNow method of ExecutorService as this uses * Thread.interrupt under-the-hood which can silently * close file descriptors (see <a * href="https://issues.apache.org/jira/browse/LUCENE-2239">LUCENE-2239</a>). * * @lucene.experimental */ public IndexSearcher(IndexReader r, ExecutorService executor) { this(r.getContext(), executor); }
/** * Returns the reader's leaves, or itself if this reader is atomic. * This is a convenience method calling {@code this.getContext().leaves()}. * @see IndexReaderContext#leaves() */ public final List<LeafReaderContext> leaves() { return getContext().leaves(); }
final TermContext[] contexts = ArrayUtil.copyOfSubArray(this.contexts, 0, this.contexts.length); for (int i = 0; i < contexts.length; ++i) { if (contexts[i] == null || contexts[i].wasBuiltFor(reader.getContext()) == false) { contexts[i] = TermContext.build(reader.getContext(), terms[i]); contexts[i] = adjustFrequencies(reader.getContext(), contexts[i], df, ttf);
/** * Compute a feature value that may be used as the {@code pivot} parameter of * the {@link #newSaturationQuery(String, String, float, float)} and * {@link #newSigmoidQuery(String, String, float, float, float)} factory * methods. The implementation takes the average of the int bits of the float * representation in practice before converting it back to a float. Given that * floats store the exponent in the higher bits, it means that the result will * be an approximation of the geometric mean of all feature values. * @param reader the {@link IndexReader} to search against * @param featureField the field that stores features * @param featureName the name of the feature */ static float computePivotFeatureValue(IndexReader reader, String featureField, String featureName) throws IOException { Term term = new Term(featureField, featureName); TermContext context = TermContext.build(reader.getContext(), term); if (context.docFreq() == 0) { // avoid division by 0 // The return value doesn't matter much here, the term doesn't exist, // it will never be used for scoring. Just Make sure to return a legal // value. return 1; } float avgFreq = (float) ((double) context.totalTermFreq() / context.docFreq()); return decodeFeatureValue(avgFreq); } }
final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { IndexReaderContext topReaderContext = reader.getContext(); for (LeafReaderContext context : topReaderContext.leaves()) { final Terms terms = context.reader().terms(query.field); if (terms == null) { // field does not exist continue; } final TermsEnum termsEnum = getTermsEnum(query, terms, collector.attributes); assert termsEnum != null; if (termsEnum == TermsEnum.EMPTY) continue; collector.setReaderContext(topReaderContext, context); collector.setNextEnum(termsEnum); BytesRef bytes; while ((bytes = termsEnum.next()) != null) { if (!collector.collect(bytes)) return; // interrupt whole term collection, so also don't iterate other subReaders } } }
@Override public Query rewrite(IndexReader reader) throws IOException { if (Objects.equals(reader.getContext().id(), readerId) == false) { return new MinDocQuery(minDoc, reader.getContext().id()); } return this; }
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(fieldType.name()); Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(fieldType.name(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(fieldType.name()); if (values.isEmpty()) { return EMPTY_FIELDS; } Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(fieldType.name(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { if (readerId == null) { throw new IllegalStateException("Rewrite first"); } else if (Objects.equals(searcher.getIndexReader().getContext().id(), readerId) == false) { throw new IllegalStateException("Executing against a different reader than the query has been rewritten against"); } return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { final int maxDoc = context.reader().maxDoc(); if (context.docBase + maxDoc <= minDoc) { return null; } final int segmentMinDoc = Math.max(0, minDoc - context.docBase); final DocIdSetIterator disi = new MinDocIterator(segmentMinDoc, maxDoc); return new ConstantScoreScorer(this, score(), disi); } @Override public boolean isCacheable(LeafReaderContext ctx) { // Let's not cache this query, the cached iterator would use more memory // and be slower anyway. // Also, matches in a given segment depend on the other segments, which // makes it a bad candidate for per-segment caching. return false; } }; }
if (uniqueTypes.add(type)) { Term term = new Term(CONTENT_TYPE, type); TermContext context = TermContext.build(reader.getContext(), term); if (context.docFreq() == 0) {
for (LeafReaderContext ctx : searcher.reader().getContext().leaves()) { fillSegmentInfo(Lucene.segmentReader(ctx.reader()), verbose, true, segments); for (LeafReaderContext ctx : searcher.reader().getContext().leaves()) { SegmentReader segmentReader = Lucene.segmentReader(ctx.reader()); if (segments.containsKey(segmentReader.getSegmentName()) == false) {
actualDf++; contexts[i] = ctx = adjustDF(reader.getContext(), ctx, Math.min(maxDoc, actualDf)); prev = current; if (sumTTF >= 0 && ctx.totalTermFreq() >= 0) { contexts[i] = adjustTTF(reader.getContext(), contexts[i], fixedTTF);
@Override public Query rewrite(IndexReader reader) throws IOException { Query rewritten = super.rewrite(reader); if (rewritten != this) { return rewritten; } IndexReaderContext context = reader.getContext(); TermContext[] ctx = new TermContext[terms.length]; int[] docFreqs = new int[ctx.length]; for (int i = 0; i < terms.length; i++) { ctx[i] = TermContext.build(context, terms[i]); docFreqs[i] = ctx[i].docFreq(); } final int maxDoc = reader.maxDoc(); blend(ctx, maxDoc, reader); return topLevelQuery(terms, ctx, docFreqs, maxDoc); }
/** * Global stats on segments. */ public SegmentsStats segmentsStats(boolean includeSegmentFileSizes) { ensureOpen(); Set<String> segmentName = new HashSet<>(); SegmentsStats stats = new SegmentsStats(); try (Searcher searcher = acquireSearcher("segments_stats", SearcherScope.INTERNAL)) { for (LeafReaderContext ctx : searcher.reader().getContext().leaves()) { SegmentReader segmentReader = Lucene.segmentReader(ctx.reader()); fillSegmentStats(segmentReader, includeSegmentFileSizes, stats); segmentName.add(segmentReader.getSegmentName()); } } try (Searcher searcher = acquireSearcher("segments_stats", SearcherScope.EXTERNAL)) { for (LeafReaderContext ctx : searcher.reader().getContext().leaves()) { SegmentReader segmentReader = Lucene.segmentReader(ctx.reader()); if (segmentName.contains(segmentReader.getSegmentName()) == false) { fillSegmentStats(segmentReader, includeSegmentFileSizes, stats); } } } writerSegmentStats(stats); return stats; }
/** * Returns the reader's leaves, or itself if this reader is atomic. * This is a convenience method calling {@code this.getContext().leaves()}. * @see IndexReaderContext#leaves() */ public final List<LeafReaderContext> leaves() { return getContext().leaves(); }
/** * Returns the reader's leaves, or itself if this reader is atomic. * This is a convenience method calling {@code this.getContext().leaves()}. * @see IndexReaderContext#leaves() */ public final List<LeafReaderContext> leaves() { return getContext().leaves(); }
@Override public Query rewrite(IndexReader reader) throws IOException { if (Objects.equals(reader.getContext().id(), readerId) == false) { return new MinDocQuery(minDoc, reader.getContext().id()); } return this; }
@Override public Query rewrite(IndexReader reader) throws IOException { if (Objects.equals(reader.getContext().id(), readerId) == false) { return new MinDocQuery(minDoc, reader.getContext().id()); } return this; }
public static HitEnum fromPostings(IndexReader reader, int docId, String fieldName, CompiledAutomaton acceptable, TermWeigher<BytesRef> queryWeigher, TermWeigher<BytesRef> corpusWeigher, TermSourceFinder<BytesRef> sourceFinder) throws IOException { List<LeafReaderContext> leaves = reader.getContext().leaves(); int leaf = ReaderUtil.subIndex(docId, leaves); LeafReaderContext subcontext = leaves.get(leaf); LeafReader atomicReader = subcontext.reader(); docId -= subcontext.docBase; return fromTerms(atomicReader.terms(fieldName), acceptable, docId, queryWeigher, corpusWeigher, sourceFinder); }
@Override protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SourceLookup sourceLookup = searchContext.lookup().source(); sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId); List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName())); Field[] fields = new Field[values.size()]; for (int i = 0; i < values.size(); i++) { fields[i] = new Field(mapper.fieldType().names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED); } return fields; }