/** Create {@code LongValueFacetCounts}, using the provided * {@link LongValuesSource}. * random access (implement {@link org.apache.lucene.search.DocIdSet#bits}). */ public LongValueFacetCounts(String field, LongValuesSource valueSource, FacetsCollector hits, boolean multiValued) throws IOException { this.field = field; if (valueSource == null) { if (multiValued) { countMultiValued(field, hits.getMatchingDocs()); } else { count(field, hits.getMatchingDocs()); } } else { // value source is always single valued if (multiValued) { throw new IllegalArgumentException("can only compute multi-valued facets directly from doc values (when valueSource is null)"); } count(valueSource, hits.getMatchingDocs()); } }
/** Counts all facet values for this reader. This produces the same result as computing * facets on a {@link org.apache.lucene.search.MatchAllDocsQuery}, but is more efficient. */ public LongValueFacetCounts(String field, IndexReader reader, boolean multiValued) throws IOException { this.field = field; if (multiValued) { countAllMultiValued(reader, field); } else { countAll(reader, field); } }
@Override public List<FacetResult> getAllDims(int topN) throws IOException { return Collections.singletonList(getTopChildren(topN, field)); }
private void countAllMultiValued(IndexReader reader, String field) throws IOException { for (LeafReaderContext context : reader.leaves()) { SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field); if (values == null) { // this field has no doc values for this segment continue; } NumericDocValues singleValues = DocValues.unwrapSingleton(values); if (singleValues != null) { countAllOneSegment(singleValues); } else { int doc; while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int limit = values.docValueCount(); totCount += limit; for (int i = 0; i < limit; i++) { increment(values.nextValue()); } } } } }
/** Counts directly from SortedNumericDocValues. */ private void countMultiValued(String field, List<MatchingDocs> matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { SortedNumericDocValues values = hits.context.reader().getSortedNumericDocValues(field); if (values == null) { // this field has no doc values for this segment continue; } NumericDocValues singleValues = DocValues.unwrapSingleton(values); if (singleValues != null) { countOneSegment(singleValues, hits); } else { DocIdSetIterator it = ConjunctionDISI.intersectIterators( Arrays.asList(hits.bits.iterator(), values)); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { int limit = values.docValueCount(); totCount += limit; for (int i = 0; i < limit; i++) { increment(values.nextValue()); } } } } }
private void countAllOneSegment(NumericDocValues values) throws IOException { int doc; while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { totCount++; increment(values.longValue()); } }
/** Counts all facet values for the provided {@link LongValuesSource}. This produces the same result as computing * facets on a {@link org.apache.lucene.search.MatchAllDocsQuery}, but is more efficient. */ public LongValueFacetCounts(String field, LongValuesSource valueSource, IndexReader reader) throws IOException { this.field = field; countAll(valueSource, field, reader); }
@Override public FacetResult getTopChildren(int topN, String dim, String... path) { if (dim.equals(field) == false) { throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\""); } if (path.length != 0) { throw new IllegalArgumentException("path.length should be 0"); } return getTopChildrenSortByCount(topN); }
private void count(String field, List<MatchingDocs> matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { NumericDocValues fv = hits.context.reader().getNumericDocValues(field); if (fv == null) { continue; } countOneSegment(fv, hits); } }
/** Optimized version that directly counts all doc values. */ private void countAll(IndexReader reader, String field) throws IOException { for (LeafReaderContext context : reader.leaves()) { NumericDocValues values = context.reader().getNumericDocValues(field); if (values == null) { // this field has no doc values for this segment continue; } countAllOneSegment(values); } }
if (countsAdded == false && hashValues[i] >= counts.length) { countsAdded = true; appendCounts(labelValues); appendCounts(labelValues);
private void countOneSegment(NumericDocValues values, MatchingDocs hits) throws IOException { DocIdSetIterator it = ConjunctionDISI.intersectIterators( Arrays.asList(hits.bits.iterator(), values)); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { increment(values.longValue()); totCount++; } }
private void countAll(LongValuesSource valueSource, String field, IndexReader reader) throws IOException { for (LeafReaderContext context : reader.leaves()) { LongValues fv = valueSource.getValues(context, null); int maxDoc = context.reader().maxDoc(); for (int doc = 0; doc < maxDoc; doc++) { // Skip missing docs: if (fv.advanceExact(doc)) { increment(fv.longValue()); totCount++; } } } }
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { LongValues fv = valueSource.getValues(hits.context, null); // NOTE: this is not as efficient as working directly with the doc values APIs in the sparse case // because we are doing a linear scan across all hits, but this API is more flexible since a // LongValuesSource can compute interesting values at query time DocIdSetIterator docs = hits.bits.iterator(); for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS;) { // Skip missing docs: if (fv.advanceExact(doc)) { increment(fv.longValue()); totCount++; } doc = docs.nextDoc(); } } }