/** * Converts sparse to dense hll register. * @param sparseRegister * - sparse register to be converted * @return converted dense register */ private HLLDenseRegister sparseToDenseRegister(HLLSparseRegister sparseRegister) { if (sparseRegister == null) { return null; } int p = sparseRegister.getP(); int pMask = (1 << p) - 1; HLLDenseRegister result = new HLLDenseRegister(p, bitPacking); for (Map.Entry<Integer, Byte> entry : sparseRegister.getSparseMap().entrySet()) { int key = entry.getKey(); int idx = key & pMask; result.set(idx, entry.getValue()); } return result; }
public void add(long hashcode) { if (encoding.equals(EncodingType.SPARSE)) { if (sparseRegister.add(hashcode)) { invalidateCount = true; } // if size of sparse map excess the threshold convert the sparse map to // dense register and switch to DENSE encoding if (sparseRegister.getSize() > encodingSwitchThreshold) { encoding = EncodingType.DENSE; denseRegister = sparseToDenseRegister(sparseRegister); sparseRegister = null; invalidateCount = true; } } else { if (denseRegister.add(hashcode)) { invalidateCount = true; } } }
public void merge(HLLRegister hllRegister) { if (hllRegister instanceof HLLDenseRegister) { HLLDenseRegister hdr = (HLLDenseRegister) hllRegister; byte[] inRegister = hdr.getRegister(); // merge only if the register length matches if (register.length != inRegister.length) { throw new IllegalArgumentException( "The size of register sets of HyperLogLogs to be merged does not match."); } // compare register values and store the max register value for (int i = 0; i < inRegister.length; i++) { final byte cb = register[i]; final byte ob = inRegister[i]; register[i] = ob > cb ? ob : cb; } // update max register value if (hdr.getMaxRegisterValue() > maxRegisterValue) { maxRegisterValue = hdr.getMaxRegisterValue(); } } else { throw new IllegalArgumentException("Specified register is not instance of HLLDenseRegister"); } }
double sum = denseRegister.getSumInversePow2(); long numZeros = denseRegister.getNumZeroes();
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("HLLDenseRegister - "); sb.append("p: "); sb.append(p); sb.append(" numZeroes: "); sb.append(getNumZeroes()); sb.append(" maxRegisterValue: "); sb.append(maxRegisterValue); return sb.toString(); }
@Override public int hashCode() { int hashcode = 0; hashcode += 31 * p; hashcode += 31 * chosenHashBits; hashcode += encoding.hashCode(); hashcode += 31 * estimateNumDistinctValues(); if (encoding.equals(EncodingType.DENSE)) { hashcode += 31 * denseRegister.hashCode(); } if (encoding.equals(EncodingType.SPARSE)) { hashcode += 31 * sparseRegister.hashCode(); } return hashcode; }
@Override public boolean equals(Object obj) { if (!(obj instanceof HyperLogLog)) { return false; } HyperLogLog other = (HyperLogLog) obj; long count = estimateNumDistinctValues(); long otherCount = other.estimateNumDistinctValues(); boolean result = p == other.p && chosenHashBits == other.chosenHashBits && encoding.equals(other.encoding) && count == otherCount; if (encoding.equals(EncodingType.DENSE)) { result = result && denseRegister.equals(other.getHLLDenseRegister()); } if (encoding.equals(EncodingType.SPARSE)) { result = result && sparseRegister.equals(other.getHLLSparseRegister()); } return result; }
/** * Reduces the accuracy of the HLL provided to a smaller size * @param p0 * - new p size for the new HyperLogLog (smaller or no change) * @return reduced (or same) HyperLogLog instance */ public HyperLogLog squash(final int p0) { if (p0 > p) { throw new IllegalArgumentException( "HyperLogLog cannot be be squashed to be bigger. Current: " + toString() + " Provided: " + p0); } if (p0 == p) { return this; } final HyperLogLog hll = new HyperLogLogBuilder() .setNumRegisterIndexBits(p0).setEncoding(EncodingType.DENSE) .enableNoBias(noBias).build(); final HLLDenseRegister result = hll.denseRegister; if (encoding == EncodingType.SPARSE) { sparseRegister.extractLowBitsTo(result); } else if (encoding == EncodingType.DENSE) { denseRegister.extractLowBitsTo(result); } return hll; }
} else { this.sparseRegister = null; this.denseRegister = new HLLDenseRegister(p, bitPacking);
denseRegister.merge(hll.getHLLDenseRegister()); } else if (encoding.equals(EncodingType.SPARSE) && otherEncoding.equals(EncodingType.DENSE)) { denseRegister = sparseToDenseRegister(sparseRegister); denseRegister.merge(hll.getHLLDenseRegister()); sparseRegister = null; encoding = EncodingType.DENSE; } else if (encoding.equals(EncodingType.DENSE) && otherEncoding.equals(EncodingType.SPARSE)) { HLLDenseRegister otherDenseRegister = sparseToDenseRegister(hll.getHLLSparseRegister()); denseRegister.merge(otherDenseRegister);
double sum = denseRegister.getSumInversePow2(); long numZeros = denseRegister.getNumZeroes();
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("HLLDenseRegister - "); sb.append("p: "); sb.append(p); sb.append(" numZeroes: "); sb.append(getNumZeroes()); sb.append(" maxRegisterValue: "); sb.append(maxRegisterValue); return sb.toString(); }
@Override public int hashCode() { int hashcode = 0; hashcode += 31 * p; hashcode += 31 * chosenHashBits; hashcode += encoding.hashCode(); hashcode += 31 * estimateNumDistinctValues(); if (encoding.equals(EncodingType.DENSE)) { hashcode += 31 * denseRegister.hashCode(); } if (encoding.equals(EncodingType.SPARSE)) { hashcode += 31 * sparseRegister.hashCode(); } return hashcode; }
@Override public boolean equals(Object obj) { if (!(obj instanceof HyperLogLog)) { return false; } HyperLogLog other = (HyperLogLog) obj; long count = estimateNumDistinctValues(); long otherCount = other.estimateNumDistinctValues(); boolean result = p == other.p && chosenHashBits == other.chosenHashBits && encoding.equals(other.encoding) && count == otherCount; if (encoding.equals(EncodingType.DENSE)) { result = result && denseRegister.equals(other.getHLLDenseRegister()); } if (encoding.equals(EncodingType.SPARSE)) { result = result && sparseRegister.equals(other.getHLLSparseRegister()); } return result; }
/** * Reduces the accuracy of the HLL provided to a smaller size * @param p0 * - new p size for the new HyperLogLog (smaller or no change) * @return reduced (or same) HyperLogLog instance */ public HyperLogLog squash(final int p0) { if (p0 > p) { throw new IllegalArgumentException( "HyperLogLog cannot be be squashed to be bigger. Current: " + toString() + " Provided: " + p0); } if (p0 == p) { return this; } final HyperLogLog hll = new HyperLogLogBuilder() .setNumRegisterIndexBits(p0).setEncoding(EncodingType.DENSE) .enableNoBias(noBias).build(); final HLLDenseRegister result = hll.denseRegister; if (encoding == EncodingType.SPARSE) { sparseRegister.extractLowBitsTo(result); } else if (encoding == EncodingType.DENSE) { denseRegister.extractLowBitsTo(result); } return hll; }
} else { this.sparseRegister = null; this.denseRegister = new HLLDenseRegister(p, bitPacking);
denseRegister.merge(hll.getHLLDenseRegister()); } else if (encoding.equals(EncodingType.SPARSE) && otherEncoding.equals(EncodingType.DENSE)) { denseRegister = sparseToDenseRegister(sparseRegister); denseRegister.merge(hll.getHLLDenseRegister()); sparseRegister = null; encoding = EncodingType.DENSE; } else if (encoding.equals(EncodingType.DENSE) && otherEncoding.equals(EncodingType.SPARSE)) { HLLDenseRegister otherDenseRegister = sparseToDenseRegister(hll.getHLLSparseRegister()); denseRegister.merge(otherDenseRegister);
int lzr = hll.getHLLDenseRegister().getMaxRegisterValue(); bitWidth = getBitWidth(lzr); byte[] register = hll.getHLLDenseRegister().getRegister(); bitpackHLLRegister(out, register, bitWidth); } else if (enc.equals(EncodingType.SPARSE)) {
/** * Converts sparse to dense hll register. * @param sparseRegister * - sparse register to be converted * @return converted dense register */ private HLLDenseRegister sparseToDenseRegister(HLLSparseRegister sparseRegister) { if (sparseRegister == null) { return null; } int p = sparseRegister.getP(); int pMask = (1 << p) - 1; HLLDenseRegister result = new HLLDenseRegister(p, bitPacking); for (Map.Entry<Integer, Byte> entry : sparseRegister.getSparseMap().entrySet()) { int key = entry.getKey(); int idx = key & pMask; result.set(idx, entry.getValue()); } return result; }
public void add(long hashcode) { if (encoding.equals(EncodingType.SPARSE)) { if (sparseRegister.add(hashcode)) { invalidateCount = true; } // if size of sparse map excess the threshold convert the sparse map to // dense register and switch to DENSE encoding if (sparseRegister.getSize() > encodingSwitchThreshold) { encoding = EncodingType.DENSE; denseRegister = sparseToDenseRegister(sparseRegister); sparseRegister = null; invalidateCount = true; } } else { if (denseRegister.add(hashcode)) { invalidateCount = true; } } }