public String toStringExtended() { if (encoding.equals(EncodingType.DENSE)) { return toString() + ", " + denseRegister.toExtendedString(); } else if (encoding.equals(EncodingType.SPARSE)) { return toString() + ", " + sparseRegister.toExtendedString(); } return toString(); }
public void reset() { hll = HyperLogLog.builder().setNumRegisterIndexBits(12).build(); } }
@Override public int hashCode() { int hashcode = 0; hashcode += 31 * p; hashcode += 31 * chosenHashBits; hashcode += encoding.hashCode(); hashcode += 31 * estimateNumDistinctValues(); if (encoding.equals(EncodingType.DENSE)) { hashcode += 31 * denseRegister.hashCode(); } if (encoding.equals(EncodingType.SPARSE)) { hashcode += 31 * sparseRegister.hashCode(); } return hashcode; }
@Override public boolean equals(Object obj) { if (!(obj instanceof HyperLogLog)) { return false; } HyperLogLog other = (HyperLogLog) obj; long count = estimateNumDistinctValues(); long otherCount = other.estimateNumDistinctValues(); boolean result = p == other.p && chosenHashBits == other.chosenHashBits && encoding.equals(other.encoding) && count == otherCount; if (encoding.equals(EncodingType.DENSE)) { result = result && denseRegister.equals(other.getHLLDenseRegister()); } if (encoding.equals(EncodingType.SPARSE)) { result = result && sparseRegister.equals(other.getHLLSparseRegister()); } return result; }
public void add(long hashcode) { if (encoding.equals(EncodingType.SPARSE)) { if (sparseRegister.add(hashcode)) { invalidateCount = true; } // if size of sparse map excess the threshold convert the sparse map to // dense register and switch to DENSE encoding if (sparseRegister.getSize() > encodingSwitchThreshold) { encoding = EncodingType.DENSE; denseRegister = sparseToDenseRegister(sparseRegister); sparseRegister = null; invalidateCount = true; } } else { if (denseRegister.add(hashcode)) { invalidateCount = true; } } }
public static NumDistinctValueEstimator getEmptyNumDistinctValueEstimator( NumDistinctValueEstimator n) { if (n instanceof FMSketch) { return new FMSketch(((FMSketch) n).getNumBitVectors()); } else { return HyperLogLog.builder().setSizeOptimized().build(); } }
@Override public void mergeEstimators(NumDistinctValueEstimator o) { merge((HyperLogLog) o); }
@Override public void addToEstimator(long v) { addLong(v); }
@Override public NumDistinctValueEstimator deserialize(byte[] buf) { return HyperLogLogUtils.deserializeHLL(buf); }
public boolean add(long hashcode) { boolean updated = false; // fill the temp list before merging to sparse map if (tempListIdx < tempList.length) { int encodedHash = encodeHash(hashcode); tempList[tempListIdx++] = encodedHash; updated = true; } else { updated = mergeTempListToSparseMap(); } return updated; }
@Override public void addToEstimator(String s) { addString(s); }
@Override public void addToEstimator(double d) { addDouble(d); }
public void addBoolean(boolean val) { add(val ? HASH64_ONE : HASH64_ZERO); }
/** * Reconstruct sparse map from serialized integer list * @param reg * - uncompressed and delta decoded integer list */ public void setHLLSparseRegister(int[] reg) { for (int i : reg) { int key = i >>> HLLConstants.Q_PRIME_VALUE; byte value = (byte) (i & 0x3f); sparseRegister.set(key, value); } }
/** * Reconstruct dense registers from byte array * @param reg * - unpacked byte array */ public void setHLLDenseRegister(byte[] reg) { int i = 0; for (byte b : reg) { denseRegister.set(i, b); i++; } }
public void extractLowBitsTo(HLLRegister dest) { for (int idx = 0; idx < register.length; idx++) { byte lr = register[idx]; // this can be a max of 65, never > 127 if (lr != 0) { dest.add((long) ((1 << (p + lr - 1)) | idx)); } } }
public HyperLogLog build() { return new HyperLogLog(this); } }
private TreeMap<Integer,Byte> getMergedSparseMap() { if (tempListIdx != 0) { mergeTempListToSparseMap(); } return sparseMap; }
public TreeMap<Integer,Byte> getSparseMap() { return getMergedSparseMap(); }