@Override public void init(ByteBuffer buf, int position) { final ByteBuffer mutationBuffer = buf.duplicate(); mutationBuffer.position(position); BloomKFilter filter = new BloomKFilter(maxNumEntries); BloomKFilter.serialize(mutationBuffer, filter); }
/** * ByteBuffer based copy of {@link BloomKFilter#addInt(int)} that adds a value to the ByteBuffer in place. */ public static void addInt(ByteBuffer buffer, int val) { addBytes(buffer, intToByteArrayLE(val)); }
/** * ByteBuffer based copy of {@link BloomKFilter#addFloat(float)} that adds a value to the ByteBuffer in place. */ public static void addFloat(ByteBuffer buffer, float val) { addInt(buffer, Float.floatToIntBits(val)); }
/** * Calculate size in bytes of a BloomKFilter for a given number of entries */ public static int computeSizeBytes(long maxNumEntries) { // copied from constructor checkArgument(maxNumEntries > 0, "expectedEntries should be > 0"); long numBits = optimalNumOfBits(maxNumEntries, DEFAULT_FPP); int nLongs = (int) Math.ceil((double) numBits / (double) Long.SIZE); int padLongs = DEFAULT_BLOCK_SIZE - nLongs % DEFAULT_BLOCK_SIZE; return START_OF_SERIALIZED_LONGS + ((nLongs + padLongs) * Long.BYTES); }
public BloomKFilter(long maxNumEntries) { checkArgument(maxNumEntries > 0, "expectedEntries should be > 0"); long numBits = optimalNumOfBits(maxNumEntries, DEFAULT_FPP); this.k = optimalNumOfHashFunctions(maxNumEntries, numBits); int nLongs = (int) Math.ceil((double) numBits / (double) Long.SIZE); // additional bits to pad long array to block size int padLongs = DEFAULT_BLOCK_SIZE - nLongs % DEFAULT_BLOCK_SIZE; this.m = (nLongs + padLongs) * Long.SIZE; this.bitSet = new BitSet(m); checkArgument((bitSet.data.length % DEFAULT_BLOCK_SIZE) == 0, "bitSet has to be block aligned"); this.totalBlockCount = bitSet.data.length / DEFAULT_BLOCK_SIZE; }
/** * ByteBuffer based copy of {@link BloomKFilter#addBytes(byte[])} that adds a value to the ByteBuffer in place. */ public static void addBytes(ByteBuffer buffer, byte[] val) { addBytes(buffer, val, 0, val.length); }
@Override public void bufferAdd(ByteBuffer buf) { if (selector.getRow().size() > 1) { selector.getRow().forEach(v -> { String value = selector.lookupName(v); if (value == null) { BloomKFilter.addBytes(buf, null, 0, 0); } else { BloomKFilter.addString(buf, value); } }); } else { String value = (String) selector.getObject(); if (value == null) { BloomKFilter.addBytes(buf, null, 0, 0); } else { BloomKFilter.addString(buf, value); } } } }
@Override public void fold(ColumnValueSelector selector) { BloomKFilter other = (BloomKFilter) selector.getObject(); if (other == null) { return; } if (combined == null) { combined = new BloomKFilter(maxNumEntries); } combined.merge(other); }
public void addLong(long val) { // puts long in little endian order addHash(Murmur3.hash64(val)); }
/** * Deserialize a bloom filter * Read a byte stream, which was written by {@linkplain #serialize(OutputStream, BloomKFilter)} * into a {@code BloomKFilter} * * @param in input bytestream * * @return deserialized BloomKFilter */ public static BloomKFilter deserialize(InputStream in) throws IOException { if (in == null) { throw new IOException("Input stream is null"); } try { DataInputStream dataInputStream = new DataInputStream(in); int numHashFunc = dataInputStream.readByte(); int bitsetArrayLen = dataInputStream.readInt(); long[] data = new long[bitsetArrayLen]; for (int i = 0; i < bitsetArrayLen; i++) { data[i] = dataInputStream.readLong(); } return new BloomKFilter(data, numHashFunc); } catch (RuntimeException e) { IOException io = new IOException("Unable to deserialize BloomKFilter"); io.initCause(e); throw io; } }
/** * ByteBuffer based copy of {@link BloomKFilter#addDouble(double)} */ public static void addDouble(ByteBuffer buffer, double val) { addLong(buffer, Double.doubleToLongBits(val)); } // custom Druid ByteBuffer methods end here
@Override public int getMaxIntermediateSize() { return BloomKFilter.computeSizeBytes(maxNumEntries); }
/** * A constructor to support rebuilding the BloomFilter from a serialized representation. * * @param bits * @param numFuncs */ public BloomKFilter(long[] bits, int numFuncs) { super(); bitSet = new BitSet(bits); this.m = bits.length * Long.SIZE; this.k = numFuncs; checkArgument((bitSet.data.length % DEFAULT_BLOCK_SIZE) == 0, "bitSet has to be block aligned"); this.totalBlockCount = bitSet.data.length / DEFAULT_BLOCK_SIZE; }
/** * ByteBuffer based copy of {@link BloomKFilter#add(byte[])} that adds a value to the ByteBuffer in place. */ public static void add(ByteBuffer buffer, byte[] val) { addBytes(buffer, val); }