public void addBytes(byte[] val, int offset, int length) { // We use the trick mentioned in "Less Hashing, Same Performance: Building a Better Bloom Filter" // by Kirsch et.al. From abstract 'only two hash functions are necessary to effectively // implement a Bloom filter without any loss in the asymptotic false positive probability' // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the technique mentioned // in the above paper long hash64 = val == null ? Murmur3.NULL_HASHCODE : Murmur3.hash64(val, offset, length); addHash(hash64); }
private boolean offerImpl(T t) { long oldState = reserveArrayIndex(EMPTY, OBJECTS); if (oldState == NO_INDEX) return false; // For whatever reason, reserve failed. long originalMarker = EMPTY.getMarker(oldState), delta = EMPTY.getDelta(oldState); int arrayIndex = (int)getArrayIndex(originalMarker, delta); if (pool[arrayIndex] != null) { throwError(oldState, arrayIndex, "non-null"); } pool[arrayIndex] = t; commitArrayIndex(EMPTY, OBJECTS, originalMarker); return true; }
public HiveBloomFilter(BloomFilter bloomFilter) { this.bitSet = new BitSet(bloomFilter.getBitSet().clone()); this.numBits = bloomFilter.getBitSize(); this.numHashFunctions = bloomFilter.getNumHashFunctions(); }
public boolean testBytes(byte[] val, int offset, int length) { long hash64 = val == null ? Murmur3.NULL_HASHCODE : Murmur3.hash64(val, offset, length); return testHash(hash64); }
public BloomFilter(long expectedEntries, double fpp) { checkArgument(expectedEntries > 0, "expectedEntries should be > 0"); checkArgument(fpp > 0.0 && fpp < 1.0, "False positive probability should be > 0.0 & < 1.0"); int nb = optimalNumOfBits(expectedEntries, fpp); // make 'm' multiple of 64 this.numBits = nb + (Long.SIZE - (nb % Long.SIZE)); this.numHashFunctions = optimalNumOfHashFunctions(expectedEntries, numBits); this.bitSet = new BitSet(numBits); }
public void addBytes(byte[] val, int offset, int length) { // We use the trick mentioned in "Less Hashing, Same Performance: Building a Better Bloom Filter" // by Kirsch et.al. From abstract 'only two hash functions are necessary to effectively // implement a Bloom filter without any loss in the asymptotic false positive probability' // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the technique mentioned // in the above paper long hash64 = val == null ? Murmur3.NULL_HASHCODE : Murmur3.hash64(val, offset, length); addHash(hash64); }
public HiveBloomFilter(List<Long> bits, int numBits, int numHashFunctions) { this.bitSet = new BitSet(Longs.toArray(bits)); this.numBits = numBits; this.numHashFunctions = numHashFunctions; }
@Override public int hashCode() { return Objects.hash(numBits, numHashFunctions, bitSet.getData()); }
private long applyDeltaToMarker(long marker, long markerLimit, long delta) { if (delta == NO_DELTA) return marker; // List was recreated while we were exhausting it. if (delta == pool.length) { assert markerLimit == NO_MARKER; // If we had the entire pool, other list couldn't exist. return NO_MARKER; // We exhausted the entire-pool-sized list. } marker = getArrayIndex(marker, delta); // Just move the marker according to delta. if (marker == markerLimit) return NO_MARKER; // We hit the limit - the list was exhausted. return marker; }
public void addLong(long val) { // puts long in little endian order addHash(Murmur3.hash64(val)); }
private T takeImpl() { long oldState = reserveArrayIndex(OBJECTS, EMPTY); if (oldState == NO_INDEX) return null; // For whatever reason, reserve failed. long originalMarker = OBJECTS.getMarker(oldState), delta = OBJECTS.getDelta(oldState); int arrayIndex = (int)getArrayIndex(originalMarker, delta); T result = pool[arrayIndex]; if (result == null) { throwError(oldState, arrayIndex, "null"); } pool[arrayIndex] = null; commitArrayIndex(OBJECTS, EMPTY, originalMarker); return result; }
@Override public void addHash(StatisticsHasher hasher) { hasher.putInt(numBits) .putInt(numHashFunctions) .putLongs(bitSet.getData()); } }
/** * ByteBuffer based copy of {@link BloomKFilter#addBytes(byte[], int, int)} that adds a value to the ByteBuffer * in place. */ public static void addBytes(ByteBuffer buffer, byte[] val, int offset, int length) { long hash64 = val == null ? Murmur3.NULL_HASHCODE : Murmur3.hash64(val, offset, length); addHash(buffer, hash64); }
/** * ByteBuffer based copy of {@link BloomKFilter#addLong(long)} that adds a value to the ByteBuffer in place. */ public static void addLong(ByteBuffer buffer, long val) { addHash(buffer, Murmur3.hash64(val)); }
public void addBytes(byte[] val, int offset, int length) { // We use the trick mentioned in "Less Hashing, Same Performance: Building a Better Bloom Filter" // by Kirsch et.al. From abstract 'only two hash functions are necessary to effectively // implement a Bloom filter without any loss in the asymptotic false positive probability' // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the technique mentioned // in the above paper long hash64 = val == null ? Murmur3.NULL_HASHCODE : Murmur3.hash64(val, offset, length); addHash(hash64); }