/** * Returns the document count in <code>slot</code>. */ int getDocCount(int slot) { return docCounts.get(slot); }
/** * Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal) */ public final int bucketDocCount(long bucketOrd) { if (bucketOrd >= docCounts.size()) { // This may happen eg. if no document in the highest buckets is accepted by a sub aggregator. // For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value // matches the filter, then the filter will never collect bucket ord 3. However, the long terms agg will call bucketAggregations(3) // on the filter aggregator anyway to build sub-aggregations. return 0; } else { return docCounts.get(bucketOrd); } }
@Override protected void removeAndAdd(long index) { final long id = id(index, -1); assert id >= 0; final int code = hashes.get(id); reset(code, id); }
public int size(long bucket) { if (bucket >= sizes.size()) { return 0; } final int size = sizes.get(bucket); assert size == recomputedSize(bucket); return size; }
private Object getComparableData(long bucket) { if (algorithm.get(bucket) == LINEAR_COUNTING) { Set<Integer> values = new HashSet<>(); try (IntArray hashSetValues = hashSet.values(bucket)) { for (long i = 0; i < hashSetValues.size(); i++) { values.add(hashSetValues.get(i)); } } return values; } else { Map<Byte, Integer> values = new HashMap<>(); for (long i = 0; i < runLens.size(); i++) { byte runLength = runLens.get((bucket << p) + i); Integer numOccurances = values.get(runLength); if (numOccurances == null) { values.put(runLength, 1); } else { values.put(runLength, numOccurances + 1); } } return values; } }
/** Resize the array to the exact provided size. */ public IntArray resize(IntArray array, long size) { if (array instanceof BigIntArray) { return resizeInPlace((BigIntArray) array, size); } else { AbstractArray arr = (AbstractArray) array; final IntArray newArray = newIntArray(size, arr.clearOnResize); for (long i = 0, end = Math.min(size, array.size()); i < end; ++i) { newArray.set(i, array.get(i)); } array.close(); return newArray; } }
public final void mergeBuckets(long[] mergeMap, long newNumBuckets) { try (IntArray oldDocCounts = docCounts) { docCounts = bigArrays.newIntArray(newNumBuckets, true); docCounts.fill(0, newNumBuckets, 0); for (int i = 0; i < oldDocCounts.size(); i++) { int docCount = oldDocCounts.get(i); if (docCount != 0) { docCounts.increment(mergeMap[i], docCount); } } } }
void upgradeToHll(long bucket) { ensureCapacity(bucket + 1); final IntArray values = hashSet.values(bucket); try { runLens.fill(bucket << p, (bucket << p) + m, (byte) 0); for (long i = 0; i < values.size(); ++i) { final int encoded = values.get(i); collectHllEncoded(bucket, encoded); } algorithm.set(bucket); } finally { Releasables.close(values); } }
boolean found = true; if (needDocFreqs) { currentDocFreq = termDocFreqs.get(currentTermOrd); found = currentDocFreq != NOT_FOUND;
public void merge(long thisBucket, HyperLogLogPlusPlus other, long otherBucket) { if (p != other.p) { throw new IllegalArgumentException(); } ensureCapacity(thisBucket + 1); if (other.algorithm.get(otherBucket) == LINEAR_COUNTING) { final IntArray values = other.hashSet.values(otherBucket); try { for (long i = 0; i < values.size(); ++i) { final int encoded = values.get(i); if (algorithm.get(thisBucket) == LINEAR_COUNTING) { collectLcEncoded(thisBucket, encoded); } else { collectHllEncoded(thisBucket, encoded); } } } finally { Releasables.close(values); } } else { if (algorithm.get(thisBucket) != HYPERLOGLOG) { upgradeToHll(thisBucket); } final long thisStart = thisBucket << p; final long otherStart = otherBucket << p; for (int i = 0; i < m; ++i) { runLens.set(thisStart + i, (byte) Math.max(runLens.get(thisStart + i), other.runLens.get(otherStart + i))); } } }
public void writeTo(long bucket, StreamOutput out) throws IOException { out.writeVInt(p); if (algorithm.get(bucket) == LINEAR_COUNTING) { out.writeBoolean(LINEAR_COUNTING); try (IntArray hashes = hashSet.values(bucket)) { out.writeVLong(hashes.size()); for (long i = 0; i < hashes.size(); ++i) { out.writeInt(hashes.get(i)); } } } else { out.writeBoolean(HYPERLOGLOG); for (long i = bucket << p, end = i + m; i < end; ++i) { out.writeByte(runLens.get(i)); } } }
@Override protected boolean acceptAndScore(long parentIdx) { currentScore = scores.get(parentIdx); currentScore /= occurrences.get(parentIdx); return true; }
public int size(long bucket) { if (bucket >= sizes.size()) { return 0; } final int size = sizes.get(bucket); assert size == recomputedSize(bucket); return size; }
@Override protected void removeAndAdd(long index) { final long id = id(index, -1); assert id >= 0; final int code = hashes.get(id); reset(code, id); }
public int size(long bucket) { if (bucket >= sizes.size()) { return 0; } final int size = sizes.get(bucket); assert size == recomputedSize(bucket); return size; }
public int size(long bucket) { if (bucket >= sizes.size()) { return 0; } final int size = sizes.get(bucket); assert size == recomputedSize(bucket); return size; }
@Override protected void removeAndAdd(long index) { final long id = id(index, -1); assert id >= 0; final int code = hashes.get(id); reset(code, id); }
public int size(long bucket) { if (bucket >= sizes.size()) { return 0; } final int size = sizes.get(bucket); assert size == recomputedSize(bucket); return size; }
@Override protected void removeAndAdd(long index) { final long id = id(index, -1); assert id >= 0; final int code = hashes.get(id); reset(code, id); }
public final void mergeBuckets(long[] mergeMap, long newNumBuckets) { try (IntArray oldDocCounts = docCounts) { docCounts = bigArrays.newIntArray(newNumBuckets, true); docCounts.fill(0, newNumBuckets, 0); for (int i = 0; i < oldDocCounts.size(); i++) { int docCount = oldDocCounts.get(i); if (docCount != 0) { docCounts.increment(mergeMap[i], docCount); } } } }