@Test public void testHLLSparseSerialization() throws IOException { HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); Random rand = new Random(SEED); for (int i = 0; i < size; i++) { hll.addLong(rand.nextLong()); } FileOutputStream fos = new FileOutputStream(testFile); DataOutputStream out = new DataOutputStream(fos); HyperLogLogUtils.serializeHLL(out, hll); FileInputStream fis = new FileInputStream(testFile); DataInputStream in = new DataInputStream(fis); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); assertEquals(hll, deserializedHLL); assertEquals(hll.toString(), deserializedHLL.toString()); assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); assertEquals(hll.hashCode(), deserializedHLL.hashCode()); assertEquals(hll.count(), deserializedHLL.count()); }
@Test public void testHLLDenseSerialization() throws IOException { HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); Random rand = new Random(SEED); for (int i = 0; i < size; i++) { hll.addLong(rand.nextLong()); } FileOutputStream fos = new FileOutputStream(testFile); DataOutputStream out = new DataOutputStream(fos); HyperLogLogUtils.serializeHLL(out, hll); FileInputStream fis = new FileInputStream(testFile); DataInputStream in = new DataInputStream(fis); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); assertEquals(hll, deserializedHLL); assertEquals(hll.toString(), deserializedHLL.toString()); assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); assertEquals(hll.hashCode(), deserializedHLL.hashCode()); assertEquals(hll.count(), deserializedHLL.count()); }
/** * Reduces the accuracy of the HLL provided to a smaller size * @param p0 * - new p size for the new HyperLogLog (smaller or no change) * @return reduced (or same) HyperLogLog instance */ public HyperLogLog squash(final int p0) { if (p0 > p) { throw new IllegalArgumentException( "HyperLogLog cannot be be squashed to be bigger. Current: " + toString() + " Provided: " + p0); } if (p0 == p) { return this; } final HyperLogLog hll = new HyperLogLogBuilder() .setNumRegisterIndexBits(p0).setEncoding(EncodingType.DENSE) .enableNoBias(noBias).build(); final HLLDenseRegister result = hll.denseRegister; if (encoding == EncodingType.SPARSE) { sparseRegister.extractLowBitsTo(result); } else if (encoding == EncodingType.DENSE) { denseRegister.extractLowBitsTo(result); } return hll; }
@Test public void testHLLDenseSerializationHalfDistinct() throws IOException { HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).build(); Random rand = new Random(SEED); Set<Integer> hashset = new HashSet<>(); for (int i = 0; i < size; i++) { int val = rand.nextInt(size / 2); hll.addLong(val); hashset.add(val); } FileOutputStream fos = new FileOutputStream(testFile); DataOutputStream out = new DataOutputStream(fos); HyperLogLogUtils.serializeHLL(out, hll); double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * hashset.size() / 100; FileInputStream fis = new FileInputStream(testFile); DataInputStream in = new DataInputStream(fis); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); assertEquals(hll, deserializedHLL); assertEquals(hll.toString(), deserializedHLL.toString()); assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); assertEquals(hll.hashCode(), deserializedHLL.hashCode()); assertEquals(hll.count(), deserializedHLL.count()); assertEquals(hashset.size(), hll.count(), delta); assertEquals(hashset.size(), deserializedHLL.count(), delta); }
throw new IllegalArgumentException( "HyperLogLog cannot be merged as either p or hashbits are different. Current: " + toString() + " Provided: " + hll.toString()); throw new IllegalArgumentException( "HyperLogLog cannot merge a smaller p into a larger one : " + toString() + " Provided: " + hll.toString());
@Test public void testHLLDenseNoBitPacking() throws IOException { HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).enableBitPacking(false) .build(); Random rand = new Random(SEED); for (int i = 0; i < size; i++) { hll.addLong(rand.nextLong()); } FileOutputStream fos = new FileOutputStream(testFile); DataOutputStream out = new DataOutputStream(fos); HyperLogLogUtils.serializeHLL(out, hll); FileInputStream fis = new FileInputStream(testFile); DataInputStream in = new DataInputStream(fis); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); assertEquals(hll, deserializedHLL); assertEquals(hll.toString(), deserializedHLL.toString()); assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); assertEquals(hll.hashCode(), deserializedHLL.hashCode()); assertEquals(hll.count(), deserializedHLL.count()); }
@Test public void testHLLSparseNoBitPacking() throws IOException { HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE) .enableBitPacking(false).build(); Random rand = new Random(SEED); for (int i = 0; i < size; i++) { hll.addLong(rand.nextLong()); } FileOutputStream fos = new FileOutputStream(testFile); DataOutputStream out = new DataOutputStream(fos); HyperLogLogUtils.serializeHLL(out, hll); FileInputStream fis = new FileInputStream(testFile); DataInputStream in = new DataInputStream(fis); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); assertEquals(hll, deserializedHLL); assertEquals(hll.toString(), deserializedHLL.toString()); assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); assertEquals(hll.hashCode(), deserializedHLL.hashCode()); assertEquals(hll.count(), deserializedHLL.count()); }
/** * Reduces the accuracy of the HLL provided to a smaller size * @param p0 * - new p size for the new HyperLogLog (smaller or no change) * @return reduced (or same) HyperLogLog instance */ public HyperLogLog squash(final int p0) { if (p0 > p) { throw new IllegalArgumentException( "HyperLogLog cannot be be squashed to be bigger. Current: " + toString() + " Provided: " + p0); } if (p0 == p) { return this; } final HyperLogLog hll = new HyperLogLogBuilder() .setNumRegisterIndexBits(p0).setEncoding(EncodingType.DENSE) .enableNoBias(noBias).build(); final HLLDenseRegister result = hll.denseRegister; if (encoding == EncodingType.SPARSE) { sparseRegister.extractLowBitsTo(result); } else if (encoding == EncodingType.DENSE) { denseRegister.extractLowBitsTo(result); } return hll; }
@Test public void testHLLSparseSerializationHalfDistinct() throws IOException { HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE).build(); Random rand = new Random(SEED); Set<Integer> hashset = new HashSet<>(); for (int i = 0; i < size; i++) { int val = rand.nextInt(size / 2); hll.addLong(val); hashset.add(val); } FileOutputStream fos = new FileOutputStream(testFile); DataOutputStream out = new DataOutputStream(fos); HyperLogLogUtils.serializeHLL(out, hll); double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * hashset.size() / 100; FileInputStream fis = new FileInputStream(testFile); DataInputStream in = new DataInputStream(fis); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); assertEquals(hll, deserializedHLL); assertEquals(hll.toString(), deserializedHLL.toString()); assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); assertEquals(hll.hashCode(), deserializedHLL.hashCode()); assertEquals(hll.count(), deserializedHLL.count()); assertEquals(hashset.size(), hll.count(), delta); assertEquals(hashset.size(), deserializedHLL.count(), delta); }
throw new IllegalArgumentException( "HyperLogLog cannot be merged as either p or hashbits are different. Current: " + toString() + " Provided: " + hll.toString()); throw new IllegalArgumentException( "HyperLogLog cannot merge a smaller p into a larger one : " + toString() + " Provided: " + hll.toString());
@Test public void testHLLSparseNoBitPackingHalfDistinct() throws IOException { HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.SPARSE) .enableBitPacking(false).build(); Random rand = new Random(SEED); Set<Integer> hashset = new HashSet<>(); for (int i = 0; i < size; i++) { int val = rand.nextInt(size / 2); hll.addLong(val); hashset.add(val); } FileOutputStream fos = new FileOutputStream(testFile); DataOutputStream out = new DataOutputStream(fos); HyperLogLogUtils.serializeHLL(out, hll); double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * hashset.size() / 100; FileInputStream fis = new FileInputStream(testFile); DataInputStream in = new DataInputStream(fis); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); assertEquals(hll, deserializedHLL); assertEquals(hll.toString(), deserializedHLL.toString()); assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); assertEquals(hll.hashCode(), deserializedHLL.hashCode()); assertEquals(hll.count(), deserializedHLL.count()); assertEquals(hashset.size(), hll.count(), delta); assertEquals(hashset.size(), deserializedHLL.count(), delta); }
@Test public void testHLLDenseNoBitPackingHalfDistinct() throws IOException { HyperLogLog hll = HyperLogLog.builder().setEncoding(EncodingType.DENSE).enableBitPacking(false) .build(); Random rand = new Random(SEED); Set<Integer> hashset = new HashSet<>(); for (int i = 0; i < size; i++) { int val = rand.nextInt(size / 2); hll.addLong(val); hashset.add(val); } FileOutputStream fos = new FileOutputStream(testFile); DataOutputStream out = new DataOutputStream(fos); HyperLogLogUtils.serializeHLL(out, hll); double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * hashset.size() / 100; FileInputStream fis = new FileInputStream(testFile); DataInputStream in = new DataInputStream(fis); HyperLogLog deserializedHLL = HyperLogLogUtils.deserializeHLL(in); assertEquals(hll, deserializedHLL); assertEquals(hll.toString(), deserializedHLL.toString()); assertEquals(hll.toStringExtended(), deserializedHLL.toStringExtended()); assertEquals(hll.hashCode(), deserializedHLL.hashCode()); assertEquals(hll.count(), deserializedHLL.count()); assertEquals(hashset.size(), hll.count(), delta); assertEquals(hashset.size(), deserializedHLL.count(), delta); } }