@Test public void testStandardErrorToBuckets() { assertEquals(standardErrorToBuckets(0.0326), 1024); assertEquals(standardErrorToBuckets(0.0325), 1024); assertEquals(standardErrorToBuckets(0.0324), 2048); assertEquals(standardErrorToBuckets(0.0231), 2048); assertEquals(standardErrorToBuckets(0.0230), 2048); assertEquals(standardErrorToBuckets(0.0229), 4096); assertEquals(standardErrorToBuckets(0.0164), 4096); assertEquals(standardErrorToBuckets(0.0163), 4096); assertEquals(standardErrorToBuckets(0.0162), 8192); assertEquals(standardErrorToBuckets(0.0116), 8192); assertEquals(standardErrorToBuckets(0.0115), 8192); assertEquals(standardErrorToBuckets(0.0114), 16384); assertEquals(standardErrorToBuckets(0.008126), 16384); assertEquals(standardErrorToBuckets(0.008125), 16384); assertEquals(standardErrorToBuckets(0.008124), 32768); assertEquals(standardErrorToBuckets(0.00576), 32768); assertEquals(standardErrorToBuckets(0.00575), 32768); assertEquals(standardErrorToBuckets(0.00574), 65536); assertEquals(standardErrorToBuckets(0.0040626), 65536); assertEquals(standardErrorToBuckets(0.0040625), 65536); }
@CombineFunction public static void combineState(BooleanDistinctState state, BooleanDistinctState otherState) { ApproximateCountDistinctAggregation.combineState(state, otherState); }
@OutputFunction(StandardTypes.BIGINT) public static void evaluateFinal(BooleanDistinctState state, BlockBuilder out) { ApproximateCountDistinctAggregation.evaluateFinal(state, out); } }
@InputFunction public static void input(BooleanDistinctState state, @SqlType(StandardTypes.BOOLEAN) boolean value) { ApproximateCountDistinctAggregation.input(state, value, DEFAULT_STANDARD_ERROR); }
@InputFunction @TypeParameter("T") public static void input( @OperatorDependency(operator = XX_HASH_64, returnType = StandardTypes.BIGINT, argumentTypes = {"T"}) MethodHandle methodHandle, @AggregationState HyperLogLogState state, @SqlType("T") Slice value, @SqlType(StandardTypes.DOUBLE) double maxStandardError) { HyperLogLog hll = getOrCreateHyperLogLog(state, maxStandardError); state.addMemoryUsage(-hll.estimatedInMemorySize()); long hash; try { hash = (long) methodHandle.invokeExact(value); } catch (Throwable t) { throw internalError(t); } hll.addHash(hash); state.addMemoryUsage(hll.estimatedInMemorySize()); }
@VisibleForTesting static int standardErrorToBuckets(double maxStandardError) { checkCondition(maxStandardError >= LOWEST_MAX_STANDARD_ERROR && maxStandardError <= HIGHEST_MAX_STANDARD_ERROR, INVALID_FUNCTION_ARGUMENT, "Max standard error must be in [%s, %s]: %s", LOWEST_MAX_STANDARD_ERROR, HIGHEST_MAX_STANDARD_ERROR, maxStandardError); return log2Ceiling((int) Math.ceil(1.0816 / (maxStandardError * maxStandardError))); }
@InputFunction @TypeParameter("T") public static void input( @OperatorDependency(operator = XX_HASH_64, returnType = StandardTypes.BIGINT, argumentTypes = {"T"}) MethodHandle methodHandle, @AggregationState HyperLogLogState state, @SqlType("T") double value) { ApproximateCountDistinctAggregation.input(methodHandle, state, value, DEFAULT_STANDARD_ERROR); }
@InputFunction @TypeParameter("T") public static void input( @OperatorDependency(operator = XX_HASH_64, returnType = StandardTypes.BIGINT, argumentTypes = {"T"}) MethodHandle methodHandle, @AggregationState HyperLogLogState state, @SqlType("T") Slice value, @SqlType(StandardTypes.DOUBLE) double maxStandardError) { HyperLogLog hll = getOrCreateHyperLogLog(state, maxStandardError); state.addMemoryUsage(-hll.estimatedInMemorySize()); long hash; try { hash = (long) methodHandle.invokeExact(value); } catch (Throwable t) { throw internalError(t); } hll.addHash(hash); state.addMemoryUsage(hll.estimatedInMemorySize()); }
@VisibleForTesting static int standardErrorToBuckets(double maxStandardError) { checkCondition(maxStandardError >= LOWEST_MAX_STANDARD_ERROR && maxStandardError <= HIGHEST_MAX_STANDARD_ERROR, INVALID_FUNCTION_ARGUMENT, "Max standard error must be in [%s, %s]: %s", LOWEST_MAX_STANDARD_ERROR, HIGHEST_MAX_STANDARD_ERROR, maxStandardError); return log2Ceiling((int) Math.ceil(1.0816 / (maxStandardError * maxStandardError))); }
@Test public void testStandardErrorToBuckets() { assertEquals(standardErrorToBuckets(0.0326), 1024); assertEquals(standardErrorToBuckets(0.0325), 1024); assertEquals(standardErrorToBuckets(0.0324), 2048); assertEquals(standardErrorToBuckets(0.0231), 2048); assertEquals(standardErrorToBuckets(0.0230), 2048); assertEquals(standardErrorToBuckets(0.0229), 4096); assertEquals(standardErrorToBuckets(0.0164), 4096); assertEquals(standardErrorToBuckets(0.0163), 4096); assertEquals(standardErrorToBuckets(0.0162), 8192); assertEquals(standardErrorToBuckets(0.0116), 8192); assertEquals(standardErrorToBuckets(0.0115), 8192); assertEquals(standardErrorToBuckets(0.0114), 16384); assertEquals(standardErrorToBuckets(0.008126), 16384); assertEquals(standardErrorToBuckets(0.008125), 16384); assertEquals(standardErrorToBuckets(0.008124), 32768); assertEquals(standardErrorToBuckets(0.00576), 32768); assertEquals(standardErrorToBuckets(0.00575), 32768); assertEquals(standardErrorToBuckets(0.00574), 65536); assertEquals(standardErrorToBuckets(0.0040626), 65536); assertEquals(standardErrorToBuckets(0.0040625), 65536); }
@InputFunction @TypeParameter("T") public static void input( @OperatorDependency(operator = XX_HASH_64, returnType = StandardTypes.BIGINT, argumentTypes = {"T"}) MethodHandle methodHandle, @AggregationState HyperLogLogState state, @SqlType("T") Slice value) { ApproximateCountDistinctAggregation.input(methodHandle, state, value, DEFAULT_STANDARD_ERROR); }
@InputFunction @TypeParameter("T") public static void input( @OperatorDependency(operator = XX_HASH_64, returnType = StandardTypes.BIGINT, argumentTypes = {"T"}) MethodHandle methodHandle, @AggregationState HyperLogLogState state, @SqlType("T") double value, @SqlType(StandardTypes.DOUBLE) double maxStandardError) { HyperLogLog hll = getOrCreateHyperLogLog(state, maxStandardError); state.addMemoryUsage(-hll.estimatedInMemorySize()); long hash; try { hash = (long) methodHandle.invokeExact(value); } catch (Throwable t) { throw internalError(t); } hll.addHash(hash); state.addMemoryUsage(hll.estimatedInMemorySize()); }
@OutputFunction(StandardTypes.BIGINT) public static void evaluateFinal(@AggregationState HyperLogLogState state, BlockBuilder out) { ApproximateCountDistinctAggregation.evaluateFinal(state, out); }
@CombineFunction public static void combineState(@AggregationState HyperLogLogState state, @AggregationState HyperLogLogState otherState) { ApproximateCountDistinctAggregation.combineState(state, otherState); }
private static HyperLogLog getOrCreateHyperLogLog(HyperLogLogState state, double maxStandardError) { HyperLogLog hll = state.getHyperLogLog(); if (hll == null) { hll = HyperLogLog.newInstance(standardErrorToBuckets(maxStandardError)); state.setHyperLogLog(hll); state.addMemoryUsage(hll.estimatedInMemorySize()); } return hll; }
@InputFunction @TypeParameter("T") public static void input( @OperatorDependency(operator = XX_HASH_64, returnType = StandardTypes.BIGINT, argumentTypes = {"T"}) MethodHandle methodHandle, @AggregationState HyperLogLogState state, @SqlType("T") double value) { ApproximateCountDistinctAggregation.input(methodHandle, state, value, DEFAULT_STANDARD_ERROR); }
@InputFunction @TypeParameter("T") public static void input( @OperatorDependency(operator = XX_HASH_64, returnType = StandardTypes.BIGINT, argumentTypes = {"T"}) MethodHandle methodHandle, @AggregationState HyperLogLogState state, @SqlType("T") double value, @SqlType(StandardTypes.DOUBLE) double maxStandardError) { HyperLogLog hll = getOrCreateHyperLogLog(state, maxStandardError); state.addMemoryUsage(-hll.estimatedInMemorySize()); long hash; try { hash = (long) methodHandle.invokeExact(value); } catch (Throwable t) { throw internalError(t); } hll.addHash(hash); state.addMemoryUsage(hll.estimatedInMemorySize()); }
@OutputFunction(StandardTypes.BIGINT) public static void evaluateFinal(@AggregationState HyperLogLogState state, BlockBuilder out) { ApproximateCountDistinctAggregation.evaluateFinal(state, out); } }
@CombineFunction public static void combineState(@AggregationState HyperLogLogState state, @AggregationState HyperLogLogState otherState) { ApproximateCountDistinctAggregation.combineState(state, otherState); }
private static HyperLogLog getOrCreateHyperLogLog(HyperLogLogState state, double maxStandardError) { HyperLogLog hll = state.getHyperLogLog(); if (hll == null) { hll = HyperLogLog.newInstance(standardErrorToBuckets(maxStandardError)); state.setHyperLogLog(hll); state.addMemoryUsage(hll.estimatedInMemorySize()); } return hll; }