private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt( IO_MAPFILE_BLOOM_SIZE_KEY, IO_MAPFILE_BLOOM_SIZE_DEFAULT); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat( IO_MAPFILE_BLOOM_ERROR_RATE_KEY, IO_MAPFILE_BLOOM_ERROR_RATE_DEFAULT); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
@Override public synchronized void append(WritableComparable key, Writable val) throws IOException { super.append(key, val); buf.reset(); key.write(buf); bloomKey.set(byteArrayForBloomKey(buf), 1.0); bloomFilter.add(bloomKey); }
@Override public void add(Key key) { if (key == null) { throw new NullPointerException("Key can not be null"); } BloomFilter bf = getActiveStandardBF(); if (bf == null) { addRow(); bf = matrix[matrix.length - 1]; currentNbRecord = 0; } bf.add(key); currentNbRecord++; }
private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
@Override public synchronized void close() throws IOException { super.close(); DataOutputStream out = fs.create(new Path(dir, BLOOM_FILE_NAME), true); try { bloomFilter.write(out); out.flush(); out.close(); out = null; } finally { IOUtils.closeStream(out); } }
/** * Checks if this MapFile has the indicated key. The membership test is * performed using a Bloom filter, so the result has always non-zero * probability of false positives. * @param key key to check * @return false iff key doesn't exist, true if key probably exists. * @throws IOException */ public boolean probablyHasKey(WritableComparable key) throws IOException { if (bloomFilter == null) { return true; } buf.reset(); key.write(buf); bloomKey.set(byteArrayForBloomKey(buf), 1.0); return bloomFilter.membershipTest(bloomKey); }
private void initBloomFilter(FileSystem fs, String dirName, Configuration conf) { try { DataInputStream in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } }
@Override public synchronized void close() throws IOException { super.close(); DataOutputStream out = fs.create(new Path(dir, BLOOM_FILE_NAME), true); bloomFilter.write(out); out.flush(); out.close(); }
/** * Checks if this MapFile has the indicated key. The membership test is * performed using a Bloom filter, so the result has always non-zero * probability of false positives. * @param key key to check * @return false iff key doesn't exist, true if key probably exists. * @throws IOException */ public boolean probablyHasKey(WritableComparable key) throws IOException { if (bloomFilter == null) { return true; } buf.reset(); key.write(buf); bloomKey.set(byteArrayForBloomKey(buf), 1.0); return bloomFilter.membershipTest(bloomKey); }
@Nonnull public static DynamicBloomFilter newDynamicBloomFilter( @Nonnegative final int expectedNumberOfElements, @Nonnegative final float errorRate, @Nonnegative final int nbHash) { int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements) / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash))); return new DynamicBloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH, expectedNumberOfElements); }
private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
@Override public void add(Key key) { if (key == null) { throw new NullPointerException("Key can not be null"); } BloomFilter bf = getActiveStandardBF(); if (bf == null) { addRow(); bf = matrix[matrix.length - 1]; currentNbRecord = 0; } bf.add(key); currentNbRecord++; }
@Override public synchronized void append(WritableComparable key, Writable val) throws IOException { super.append(key, val); buf.reset(); key.write(buf); bloomKey.set(byteArrayForBloomKey(buf), 1.0); bloomFilter.add(bloomKey); }
@Override public synchronized void close() throws IOException { super.close(); DataOutputStream out = fs.create(new Path(dir, BLOOM_FILE_NAME), true); try { bloomFilter.write(out); out.flush(); out.close(); out = null; } finally { IOUtils.closeStream(out); } }
/** * Checks if this MapFile has the indicated key. The membership test is * performed using a Bloom filter, so the result has always non-zero * probability of false positives. * @param key key to check * @return false iff key doesn't exist, true if key probably exists. * @throws IOException */ public boolean probablyHasKey(WritableComparable key) throws IOException { if (bloomFilter == null) { return true; } buf.reset(); key.write(buf); bloomKey.set(byteArrayForBloomKey(buf), 1.0); return bloomFilter.membershipTest(bloomKey); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
@Override public void add(Key key) { if (key == null) { throw new NullPointerException("Key can not be null"); } BloomFilter bf = getActiveStandardBF(); if (bf == null) { addRow(); bf = matrix[matrix.length - 1]; currentNbRecord = 0; } bf.add(key); currentNbRecord++; }
@Override public synchronized void append(WritableComparable key, Writable val) throws IOException { super.append(key, val); buf.reset(); key.write(buf); bloomKey.set(byteArrayForBloomKey(buf), 1.0); bloomFilter.add(bloomKey); }
@Override public synchronized void close() throws IOException { super.close(); DataOutputStream out = fs.create(new Path(dir, BLOOM_FILE_NAME), true); try { bloomFilter.write(out); out.flush(); out.close(); out = null; } finally { IOUtils.closeStream(out); } }