/** * Determines & initializes bloom filter meta data from user config. Call * {@link #allocBloom()} to allocate bloom filter data. * * @param maxKeys Maximum expected number of keys that will be stored in this * bloom * @param errorRate Desired false positive error rate. Lower rate = more * storage required * @param hashType Type of hash function to use * @param foldFactor When finished adding entries, you may be able to 'fold' * this bloom to save space. Tradeoff potentially excess bytes in * bloom for ability to fold if keyCount is exponentially greater * than maxKeys. * @throws IllegalArgumentException */ // Used only in testcases public BloomFilterChunk(int maxKeys, double errorRate, int hashType, int foldFactor) throws IllegalArgumentException { this(hashType, BloomType.ROW); long bitSize = BloomFilterUtil.computeBitSize(maxKeys, errorRate); hashCount = BloomFilterUtil.optimalFunctionCount(maxKeys, bitSize); this.maxKeys = maxKeys; // increase byteSize so folding is possible byteSize = BloomFilterUtil.computeFoldableByteSize(bitSize, foldFactor); sanityCheck(); }
public void testSizing() { int bitSize = 8 * 128 * 1024; // 128 KB double errorRate = 0.025; // target false positive rate // How many keys can we store in a Bloom filter of this size maintaining // the given false positive rate, not taking into account that the n long maxKeys = BloomFilterUtil.idealMaxKeys(bitSize, errorRate); assertEquals(136570, maxKeys); // A reverse operation: how many bits would we need to store this many keys // and keep the same low false positive rate? long bitSize2 = BloomFilterUtil.computeBitSize(maxKeys, errorRate); // The bit size comes out a little different due to rounding. assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5); }
@Test public void testCompoundBloomSizing() { int bloomBlockByteSize = 4096; int bloomBlockBitSize = bloomBlockByteSize * 8; double targetErrorRate = 0.01; long maxKeysPerChunk = BloomFilterUtil.idealMaxKeys(bloomBlockBitSize, targetErrorRate); long bloomSize1 = bloomBlockByteSize * 8; long bloomSize2 = BloomFilterUtil.computeBitSize(maxKeysPerChunk, targetErrorRate); double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1); assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001); }
public void testSizing() { int bitSize = 8 * 128 * 1024; // 128 KB double errorRate = 0.025; // target false positive rate // How many keys can we store in a Bloom filter of this size maintaining // the given false positive rate, not taking into account that the n long maxKeys = BloomFilterUtil.idealMaxKeys(bitSize, errorRate); assertEquals(136570, maxKeys); // A reverse operation: how many bits would we need to store this many keys // and keep the same low false positive rate? long bitSize2 = BloomFilterUtil.computeBitSize(maxKeys, errorRate); // The bit size comes out a little different due to rounding. assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5); }
@Test public void testCompoundBloomSizing() { int bloomBlockByteSize = 4096; int bloomBlockBitSize = bloomBlockByteSize * 8; double targetErrorRate = 0.01; long maxKeysPerChunk = BloomFilterUtil.idealMaxKeys(bloomBlockBitSize, targetErrorRate); long bloomSize1 = bloomBlockByteSize * 8; long bloomSize2 = BloomFilterUtil.computeBitSize(maxKeysPerChunk, targetErrorRate); double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1); assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001); }