@Test public void testEncodeHash() { HLLSparseRegister reg = new HLLSparseRegister(14, 25, 6); int got = reg.encodeHash(input); assertEquals(expected, got); } }
public void add(long hashcode) { if (encoding.equals(EncodingType.SPARSE)) { if (sparseRegister.add(hashcode)) { invalidateCount = true; } // if size of sparse map excess the threshold convert the sparse map to // dense register and switch to DENSE encoding if (sparseRegister.getSize() > encodingSwitchThreshold) { encoding = EncodingType.DENSE; denseRegister = sparseToDenseRegister(sparseRegister); sparseRegister = null; invalidateCount = true; } } else { if (denseRegister.add(hashcode)) { invalidateCount = true; } } }
/** * Converts sparse to dense hll register. * @param sparseRegister * - sparse register to be converted * @return converted dense register */ private HLLDenseRegister sparseToDenseRegister(HLLSparseRegister sparseRegister) { if (sparseRegister == null) { return null; } int p = sparseRegister.getP(); int pMask = (1 << p) - 1; HLLDenseRegister result = new HLLDenseRegister(p, bitPacking); for (Map.Entry<Integer, Byte> entry : sparseRegister.getSparseMap().entrySet()) { int key = entry.getKey(); int idx = key & pMask; result.set(idx, entry.getValue()); } return result; }
public boolean add(long hashcode) { boolean updated = false; // fill the temp list before merging to sparse map if (tempListIdx < tempList.length) { int encodedHash = encodeHash(hashcode); tempList[tempListIdx++] = encodedHash; updated = true; } else { updated = mergeTempListToSparseMap(); } return updated; }
public void merge(HLLRegister hllRegister) { if (hllRegister instanceof HLLSparseRegister) { HLLSparseRegister hsr = (HLLSparseRegister) hllRegister; // retain only the largest value for a register index for (Map.Entry<Integer, Byte> entry : hsr.getSparseMap().entrySet()) { int key = entry.getKey(); byte value = entry.getValue(); set(key, value); } } else { throw new IllegalArgumentException("Specified register not instance of HLLSparseRegister"); } }
int mPrime = 1 << sparseRegister.getPPrime(); cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSparseMap().size()); } else {
this.encoding = hllBuilder.encoding; if (encoding.equals(EncodingType.SPARSE)) { this.sparseRegister = new HLLSparseRegister(p, HLLConstants.P_PRIME_VALUE, HLLConstants.Q_PRIME_VALUE); this.denseRegister = null;
public void extractLowBitsTo(HLLRegister dest) { for (Entry<Integer, Byte> entry : getSparseMap().entrySet()) { int idx = entry.getKey(); byte lr = entry.getValue(); // this can be a max of 65, never > 127 if (lr != 0) { // should be a no-op for sparse dest.add((long) ((1 << (p + lr - 1)) | idx)); } } }
@Override public int hashCode() { int hashcode = 0; hashcode += 31 * p; hashcode += 31 * chosenHashBits; hashcode += encoding.hashCode(); hashcode += 31 * estimateNumDistinctValues(); if (encoding.equals(EncodingType.DENSE)) { hashcode += 31 * denseRegister.hashCode(); } if (encoding.equals(EncodingType.SPARSE)) { hashcode += 31 * sparseRegister.hashCode(); } return hashcode; }
@Override public boolean equals(Object obj) { if (!(obj instanceof HyperLogLog)) { return false; } HyperLogLog other = (HyperLogLog) obj; long count = estimateNumDistinctValues(); long otherCount = other.estimateNumDistinctValues(); boolean result = p == other.p && chosenHashBits == other.chosenHashBits && encoding.equals(other.encoding) && count == otherCount; if (encoding.equals(EncodingType.DENSE)) { result = result && denseRegister.equals(other.getHLLDenseRegister()); } if (encoding.equals(EncodingType.SPARSE)) { result = result && sparseRegister.equals(other.getHLLSparseRegister()); } return result; }
/** * Reduces the accuracy of the HLL provided to a smaller size * @param p0 * - new p size for the new HyperLogLog (smaller or no change) * @return reduced (or same) HyperLogLog instance */ public HyperLogLog squash(final int p0) { if (p0 > p) { throw new IllegalArgumentException( "HyperLogLog cannot be be squashed to be bigger. Current: " + toString() + " Provided: " + p0); } if (p0 == p) { return this; } final HyperLogLog hll = new HyperLogLogBuilder() .setNumRegisterIndexBits(p0).setEncoding(EncodingType.DENSE) .enableNoBias(noBias).build(); final HLLDenseRegister result = hll.denseRegister; if (encoding == EncodingType.SPARSE) { sparseRegister.extractLowBitsTo(result); } else if (encoding == EncodingType.DENSE) { denseRegister.extractLowBitsTo(result); } return hll; }
int mPrime = 1 << sparseRegister.getPPrime(); cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSparseMap().size()); } else {
this.encoding = hllBuilder.encoding; if (encoding.equals(EncodingType.SPARSE)) { this.sparseRegister = new HLLSparseRegister(p, HLLConstants.P_PRIME_VALUE, HLLConstants.Q_PRIME_VALUE); this.denseRegister = null;
bitpackHLLRegister(out, register, bitWidth); } else if (enc.equals(EncodingType.SPARSE)) { TreeMap<Integer, Byte> sparseMap = hll.getHLLSparseRegister().getSparseMap();
@Override public int hashCode() { int hashcode = 0; hashcode += 31 * p; hashcode += 31 * chosenHashBits; hashcode += encoding.hashCode(); hashcode += 31 * estimateNumDistinctValues(); if (encoding.equals(EncodingType.DENSE)) { hashcode += 31 * denseRegister.hashCode(); } if (encoding.equals(EncodingType.SPARSE)) { hashcode += 31 * sparseRegister.hashCode(); } return hashcode; }
@Override public boolean equals(Object obj) { if (!(obj instanceof HyperLogLog)) { return false; } HyperLogLog other = (HyperLogLog) obj; long count = estimateNumDistinctValues(); long otherCount = other.estimateNumDistinctValues(); boolean result = p == other.p && chosenHashBits == other.chosenHashBits && encoding.equals(other.encoding) && count == otherCount; if (encoding.equals(EncodingType.DENSE)) { result = result && denseRegister.equals(other.getHLLDenseRegister()); } if (encoding.equals(EncodingType.SPARSE)) { result = result && sparseRegister.equals(other.getHLLSparseRegister()); } return result; }
/** * Reduces the accuracy of the HLL provided to a smaller size * @param p0 * - new p size for the new HyperLogLog (smaller or no change) * @return reduced (or same) HyperLogLog instance */ public HyperLogLog squash(final int p0) { if (p0 > p) { throw new IllegalArgumentException( "HyperLogLog cannot be be squashed to be bigger. Current: " + toString() + " Provided: " + p0); } if (p0 == p) { return this; } final HyperLogLog hll = new HyperLogLogBuilder() .setNumRegisterIndexBits(p0).setEncoding(EncodingType.DENSE) .enableNoBias(noBias).build(); final HLLDenseRegister result = hll.denseRegister; if (encoding == EncodingType.SPARSE) { sparseRegister.extractLowBitsTo(result); } else if (encoding == EncodingType.DENSE) { denseRegister.extractLowBitsTo(result); } return hll; }
public void add(long hashcode) { if (encoding.equals(EncodingType.SPARSE)) { if (sparseRegister.add(hashcode)) { invalidateCount = true; } // if size of sparse map excess the threshold convert the sparse map to // dense register and switch to DENSE encoding if (sparseRegister.getSize() > encodingSwitchThreshold) { encoding = EncodingType.DENSE; denseRegister = sparseToDenseRegister(sparseRegister); sparseRegister = null; invalidateCount = true; } } else { if (denseRegister.add(hashcode)) { invalidateCount = true; } } }