/** * Return an upper bound of the maximum bucket ordinal seen so far. */ public final long maxBucketOrd() { return docCounts.size(); }
/** * Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal) */ public final int bucketDocCount(long bucketOrd) { if (bucketOrd >= docCounts.size()) { // This may happen eg. if no document in the highest buckets is accepted by a sub aggregator. // For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value // matches the filter, then the filter will never collect bucket ord 3. However, the long terms agg will call bucketAggregations(3) // on the filter aggregator anyway to build sub-aggregations. return 0; } else { return docCounts.get(bucketOrd); } }
public int size(long bucket) { if (bucket >= sizes.size()) { return 0; } final int size = sizes.get(bucket); assert size == recomputedSize(bucket); return size; }
/** Grow an array to a size that is larger than <code>minSize</code>, * preserving content, and potentially reusing part of the provided array. */ public IntArray grow(IntArray array, long minSize) { if (minSize <= array.size()) { return array; } final long newSize = overSize(minSize, PageCacheRecycler.INT_PAGE_SIZE, Integer.BYTES); return resize(array, newSize); }
private Object getComparableData(long bucket) { if (algorithm.get(bucket) == LINEAR_COUNTING) { Set<Integer> values = new HashSet<>(); try (IntArray hashSetValues = hashSet.values(bucket)) { for (long i = 0; i < hashSetValues.size(); i++) { values.add(hashSetValues.get(i)); } } return values; } else { Map<Byte, Integer> values = new HashMap<>(); for (long i = 0; i < runLens.size(); i++) { byte runLength = runLens.get((bucket << p) + i); Integer numOccurances = values.get(runLength); if (numOccurances == null) { values.put(runLength, 1); } else { values.put(runLength, numOccurances + 1); } } return values; } }
/** Resize the array to the exact provided size. */ public IntArray resize(IntArray array, long size) { if (array instanceof BigIntArray) { return resizeInPlace((BigIntArray) array, size); } else { AbstractArray arr = (AbstractArray) array; final IntArray newArray = newIntArray(size, arr.clearOnResize); for (long i = 0, end = Math.min(size, array.size()); i < end; ++i) { newArray.set(i, array.get(i)); } array.close(); return newArray; } }
public final void mergeBuckets(long[] mergeMap, long newNumBuckets) { try (IntArray oldDocCounts = docCounts) { docCounts = bigArrays.newIntArray(newNumBuckets, true); docCounts.fill(0, newNumBuckets, 0); for (int i = 0; i < oldDocCounts.size(); i++) { int docCount = oldDocCounts.get(i); if (docCount != 0) { docCounts.increment(mergeMap[i], docCount); } } } }
private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IOException { for (long i = 1; i < segmentDocCounts.size(); i++) { // We use set(...) here, because we need to reset the slow to 0. // segmentDocCounts get reused over the segments and otherwise counts would be too high. final int inc = segmentDocCounts.set(i, 0); if (inc == 0) { continue; } final long ord = i - 1; // remember we do +1 when counting final long globalOrd = mapping.applyAsLong(ord); long bucketOrd = bucketOrds == null ? globalOrd : bucketOrds.find(globalOrd); incrementBucketDocCount(bucketOrd, inc); } } }
public IntArray values(final long bucket) { final int size = size(bucket); final IntArray values = bigArrays.newIntArray(size); if (size == 0) { return values; } int i = 0; for (int j = 0; j < capacity; ++j) { final int k = get(bucket, j); if (k != 0) { values.set(i++, k); } } assert i == values.size(); return values; }
public void writeTo(long bucket, StreamOutput out) throws IOException { out.writeVInt(p); if (algorithm.get(bucket) == LINEAR_COUNTING) { out.writeBoolean(LINEAR_COUNTING); try (IntArray hashes = hashSet.values(bucket)) { out.writeVLong(hashes.size()); for (long i = 0; i < hashes.size(); ++i) { out.writeInt(hashes.get(i)); } } } else { out.writeBoolean(HYPERLOGLOG); for (long i = bucket << p, end = i + m; i < end; ++i) { out.writeByte(runLens.get(i)); } } }
void upgradeToHll(long bucket) { ensureCapacity(bucket + 1); final IntArray values = hashSet.values(bucket); try { runLens.fill(bucket << p, (bucket << p) + m, (byte) 0); for (long i = 0; i < values.size(); ++i) { final int encoded = values.get(i); collectHllEncoded(bucket, encoded); } algorithm.set(bucket); } finally { Releasables.close(values); } }
/** * Return an upper bound of the maximum bucket ordinal seen so far. */ public final long maxBucketOrd() { return docCounts.size(); }
/** * Return an upper bound of the maximum bucket ordinal seen so far. */ public final long maxBucketOrd() { return docCounts.size(); }
public void merge(long thisBucket, HyperLogLogPlusPlus other, long otherBucket) { if (p != other.p) { throw new IllegalArgumentException(); } ensureCapacity(thisBucket + 1); if (other.algorithm.get(otherBucket) == LINEAR_COUNTING) { final IntArray values = other.hashSet.values(otherBucket); try { for (long i = 0; i < values.size(); ++i) { final int encoded = values.get(i); if (algorithm.get(thisBucket) == LINEAR_COUNTING) { collectLcEncoded(thisBucket, encoded); } else { collectHllEncoded(thisBucket, encoded); } } } finally { Releasables.close(values); } } else { if (algorithm.get(thisBucket) != HYPERLOGLOG) { upgradeToHll(thisBucket); } final long thisStart = thisBucket << p; final long otherStart = otherBucket << p; for (int i = 0; i < m; ++i) { runLens.set(thisStart + i, (byte) Math.max(runLens.get(thisStart + i), other.runLens.get(otherStart + i))); } } }
public int size(long bucket) { if (bucket >= sizes.size()) { return 0; } final int size = sizes.get(bucket); assert size == recomputedSize(bucket); return size; }
/** Grow an array to a size that is larger than <code>minSize</code>, preserving content, and potentially reusing part of the provided array. */ public IntArray grow(IntArray array, long minSize) { if (minSize <= array.size()) { return array; } final long newSize = overSize(minSize, INT_PAGE_SIZE, Integer.BYTES); return resize(array, newSize); }
/** Grow an array to a size that is larger than <code>minSize</code>, preserving content, and potentially reusing part of the provided array. */ public IntArray grow(IntArray array, long minSize) { if (minSize <= array.size()) { return array; } final long newSize = overSize(minSize, INT_PAGE_SIZE, Integer.BYTES); return resize(array, newSize); }
public int size(long bucket) { if (bucket >= sizes.size()) { return 0; } final int size = sizes.get(bucket); assert size == recomputedSize(bucket); return size; }
/** Grow an array to a size that is larger than <code>minSize</code>, preserving content, and potentially reusing part of the provided array. */ public IntArray grow(IntArray array, long minSize) { if (minSize <= array.size()) { return array; } final long newSize = overSize(minSize, INT_PAGE_SIZE, RamUsageEstimator.NUM_BYTES_INT); return resize(array, newSize); }
public final void mergeBuckets(long[] mergeMap, long newNumBuckets) { try (IntArray oldDocCounts = docCounts) { docCounts = bigArrays.newIntArray(newNumBuckets, true); docCounts.fill(0, newNumBuckets, 0); for (int i = 0; i < oldDocCounts.size(); i++) { int docCount = oldDocCounts.get(i); if (docCount != 0) { docCounts.increment(mergeMap[i], docCount); } } } }