/** * Create a new listener. * @param future The delegation target. * @param undone use undone methods. */ public DuplicateRemovalIntermediateResultListener(IntermediateFuture<E> future, boolean undone) { super(future, undone); this.filter = new BloomFilter(); }
/** * Create a new BloomFilter. * @param p The acceptable false positive rate. * @param n The expected number of entries. */ public BloomFilter(double p, int n) { this(computeOptimalM(n, p), computeOptimalK(n, computeOptimalM(n, p)), n); }
/** * Main for testing. */ public static void main(String[] args) { byte[] value = new byte[]{9}; // BloomFilter bf = new BloomFilter(10, 2); BloomFilter bf = new BloomFilter(0.05, 1000); System.out.println(bf); System.out.println("Query for 9: " + bf.mightContain(value)); System.out.println("Adding 9"); bf.add(value); System.out.println("Query for 9: " + bf.mightContain(value)); } }
/** * Test if a value is contained in the filter. * @param value The value. * @return True, if value might be contained. */ public boolean mightContain(byte[] value) { boolean ret = true; int[] hashes = hashK(value, m, k); for(int i=0; i<k && ret; ++i) { // System.out.println("hash is: "+hashes[i]); ret = bs.get(hashes[i]); } return ret; }
/** * Compute k hashes based on two hashes using * (hash1+i*hash2)%m; * @param value The byte array. * @param m The maximum number of values. * @param k The number of hash values to compute. * @return K hashes. */ public static int[] hashK(byte[] value, int m, int k) { int[] ret = new int[k]; long hash1 = Math.abs(murmur3(value, 0))%m; long hash2 = Math.abs(murmur3(value, (int)hash1))%m; for(int i = 0; i < k; i++) { ret[i] = (int)((hash1+i*hash2)%m); } return ret; }
/** * Check results before sending them further. */ public void customIntermediateResultAvailable(E result) { if(filter.add(objectToByteArray(result))) { System.out.println("addfil: "+result+" "+filter.hashCode()); super.customIntermediateResultAvailable(result); } // else // { // System.out.println("Filtered out duplicate: "+result); // } }
/** * Add a value to the filter. * @param value The value. */ public boolean add(byte[] value) { boolean ret = false; int[] hashes = hashK(value, m, k); for(int i=0; i<k; ++i) { // System.out.println("hash is: "+hashes[i]); if(!bs.get(hashes[i])) { bs.set(hashes[i]); ret = true; } } return ret; }
/** * Create a new listener. * @param delegate The delegation target. * @param undone use undone methods. */ public DuplicateRemovalIntermediateResultListener(IIntermediateResultListener<E> delegate, boolean undone) { super(delegate, undone); this.filter = new BloomFilter(); }