Refine search
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); createVector(); for (int i = 0; i < fpVector.length; i++) { List<Key> list = fpVector[i]; int size = in.readInt(); for (int j = 0; j < size; j++) { Key k = new Key(); k.readFields(in); list.add(k); } } for (int i = 0; i < keyVector.length; i++) { List<Key> list = keyVector[i]; int size = in.readInt(); for (int j = 0; j < size; j++) { Key k = new Key(); k.readFields(in); list.add(k); } } for (int i = 0; i < ratio.length; i++) { ratio[i] = in.readDouble(); } } }
/** * Constructor. * <p> * Builds a key with a specified weight. * @param value The value of <i>this</i> key. * @param weight The weight associated to <i>this</i> key. */ public Key(byte[] value, double weight) { set(value, weight); }
private double getWeight(List<Key> keyList) { double weight = 0.0; for (Key k : keyList) { weight += k.getWeight(); } return weight; }
@Test public void testTransformRange() { org.apache.accumulo.core.data.Key cbKey = new org.apache.accumulo.core.data.Key(); Assert.assertNull("empty key should not be in bloom filter", functor.transform(new Range(cbKey, cbKey))); cbKey = new org.apache.accumulo.core.data.Key("row only"); Assert.assertNull("row only key should not be in bloom filter", functor.transform(new Range(cbKey, cbKey))); cbKey = new org.apache.accumulo.core.data.Key("", "cf only"); Assert.assertNull("cf only key should not be in bloom filter", functor.transform(new Range(cbKey, cbKey))); cbKey = new org.apache.accumulo.core.data.Key("row", "and a cf"); org.apache.hadoop.util.bloom.Key bfKey = new org.apache.hadoop.util.bloom.Key(new byte[] {'a', 'n', 'd', ' ', 'a', ' ', 'c', 'f', 'r', 'o', 'w'}, 1.0); Assert.assertEquals("row and cf only key should be in bloom filter", bfKey, functor.transform(new Range(cbKey, cbKey)));
@Override public org.apache.hadoop.util.bloom.Key transform(org.apache.accumulo.core.data.Key acuKey) { byte keyData[]; ByteSequence row = acuKey.getRowData(); ByteSequence cf = acuKey.getColumnFamilyData(); ByteSequence cq = acuKey.getColumnQualifierData(); keyData = new byte[row.length() + cf.length() + cq.length()]; System.arraycopy(row.getBackingArray(), row.offset(), keyData, 0, row.length()); System.arraycopy(cf.getBackingArray(), cf.offset(), keyData, row.length(), cf.length()); System.arraycopy(cq.getBackingArray(), cq.offset(), keyData, row.length() + cf.length(), cq.length()); return new org.apache.hadoop.util.bloom.Key(keyData, 1.0); }
private void addKeyToBloomFilter(Object key) throws ExecException { Key k = new Key(DataType.toBytes(key, keyType)); if (bloomFilters.length == 1) { if (bloomFilters[0] == null) { bloomFilters[0] = new BloomFilter(vectorSizeBytes * 8, numHash, hashType); } bloomFilters[0].add(k); } else { int partition = (key.hashCode() & Integer.MAX_VALUE) % numBloomFilters; BloomFilter filter = bloomFilters[partition]; if (filter == null) { filter = new BloomFilter(vectorSizeBytes * 8, numHash, hashType); bloomFilters[partition] = filter; } filter.add(k); } }
@Override public Key transform(org.apache.accumulo.core.data.Key acuKey) { byte keyData[]; ByteSequence row = acuKey.getRowData(); ByteSequence cf = acuKey.getColumnFamilyData(); keyData = new byte[row.length() + cf.length()]; System.arraycopy(row.getBackingArray(), row.offset(), keyData, 0, row.length()); System.arraycopy(cf.getBackingArray(), cf.offset(), keyData, row.length(), cf.length()); return new Key(keyData, 1.0); }
@Test public void testTransformKey() { // key should only be in bloom filter if it contains both the field name (cf) and field value (row) org.apache.accumulo.core.data.Key cbKey = new org.apache.accumulo.core.data.Key(); Assert.assertEquals("empty key should not be in bloom filter", EMPTY_BF_KEY, functor.transform(cbKey)); cbKey = new org.apache.accumulo.core.data.Key("row only"); Assert.assertEquals("row only key should not be in bloom filter", EMPTY_BF_KEY, functor.transform(cbKey)); cbKey = new org.apache.accumulo.core.data.Key("", "cf only"); Assert.assertEquals("cf only key should not be in bloom filter", EMPTY_BF_KEY, functor.transform(cbKey)); cbKey = new org.apache.accumulo.core.data.Key("", "", "cq only"); Assert.assertEquals("cq only key should not be in bloom filter", EMPTY_BF_KEY, functor.transform(cbKey)); cbKey = new org.apache.accumulo.core.data.Key("row", "", "and cq"); Assert.assertEquals("row and cq only key should not be in bloom filter", EMPTY_BF_KEY, functor.transform(cbKey)); cbKey = new org.apache.accumulo.core.data.Key("row", "and cf"); org.apache.hadoop.util.bloom.Key bfKey = new org.apache.hadoop.util.bloom.Key(new byte[] {'a', 'n', 'd', ' ', 'c', 'f', 'r', 'o', 'w'}, 1.0); Assert.assertEquals("row and cf only key should be in bloom filter", bfKey, functor.transform(cbKey)); } }
@Override public Key transform(org.apache.accumulo.core.data.Key acuKey) { byte keyData[]; ByteSequence row = acuKey.getRowData(); keyData = new byte[row.length()]; System.arraycopy(row.getBackingArray(), 0, keyData, 0, row.length()); return new Key(keyData, 1.0); }
public boolean mightContain(String key) { if (key == null) { throw new NullPointerException("Key cannot by null"); } return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8))); }
@Override public boolean accept(final Key key, final Value value) { byte[] vertices = key.getRowData().getBackingArray(); int pos = -1; for (int i = vertices.length - 3; i > 0; --i) { if (vertices[i] == ByteArrayEscapeUtils.DELIMITER) { pos = i; break; } } if (pos == -1) { return true; } return filter.membershipTest(new org.apache.hadoop.util.bloom.Key(Arrays.copyOfRange(vertices, pos + 1, vertices.length - 2))); }
/** * Transform a accumulo key into a bloom filter key. This is expected to always return a value, however returning a zero length bloom filter key is * equivalent to ignoring the bloom filters for this key. */ @Override public org.apache.hadoop.util.bloom.Key transform(org.apache.accumulo.core.data.Key cbKey) { if (isKeyInBloomFilter(cbKey)) { byte keyData[]; // The column qualifier contains the field value ByteSequence cq = cbKey.getColumnQualifierData(); int index = getIndexOf(cq, (byte) 0); // The column family is the field name ByteSequence cf = cbKey.getColumnFamilyData(); keyData = new byte[index + cf.length() - 3]; System.arraycopy(cf.getBackingArray(), 3, keyData, 0, cf.length() - 3); System.arraycopy(cq.getBackingArray(), 0, keyData, cf.length() - 3, index); return new org.apache.hadoop.util.bloom.Key(keyData, 1.0); } return new org.apache.hadoop.util.bloom.Key(EMPTY_BYTES, 1.0); }
private void checkOnKeyMethods() { String line = "werabsdbe"; Key key = new Key(line.getBytes()); assertTrue("default key weight error ", key.getWeight() == 1d); key.set(line.getBytes(), 2d); assertTrue(" setted key weight error ", key.getWeight() == 2d); Key sKey = new Key(line.getBytes(), 2d); assertTrue("equals error", key.equals(sKey)); assertTrue("hashcode error", key.hashCode() == sKey.hashCode()); sKey = new Key(line.concat("a").getBytes(), 2d); assertFalse("equals error", key.equals(sKey)); assertFalse("hashcode error", key.hashCode() == sKey.hashCode()); sKey = new Key(line.getBytes(), 3d); assertFalse("equals error", key.equals(sKey)); assertFalse("hashcode error", key.hashCode() == sKey.hashCode()); key.incrementWeight(); assertTrue("weight error", key.getWeight() == 3d); key.incrementWeight(2d); assertTrue("weight error", key.getWeight() == 5d); }
private void checkOnReadWrite() { String line = "qryqeb354645rghdfvbaq23312fg"; DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); Key originKey = new Key(line.getBytes(), 100d); try { originKey.write(out); in.reset(out.getData(), out.getData().length); Key restoredKey = new Key(new byte[] { 0 }); assertFalse("checkOnReadWrite equals error", restoredKey.equals(originKey)); restoredKey.readFields(in); assertTrue("checkOnReadWrite equals error", restoredKey.equals(originKey)); out.reset(); } catch (Exception ioe) { Assert.fail("checkOnReadWrite ex error"); } }
public boolean iterate(String key) { if (key != null) { if (bloomFilter == null) { init(); } bloomFilter.add(new Key(key.getBytes())); /** try { ///LOG.info( "BloomFilter is " + BloomFactory.WriteBloomToString(bloomFilter ) + " after adding Key " +key); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } **/ } return true; }
@Override public void write(DataOutput out) throws IOException { super.write(out); for (int i = 0; i < fpVector.length; i++) { List<Key> list = fpVector[i]; out.writeInt(list.size()); for (Key k : list) { k.write(out); } } for (int i = 0; i < keyVector.length; i++) { List<Key> list = keyVector[i]; out.writeInt(list.size()); for (Key k : list) { k.write(out); } } for (int i = 0; i < ratio.length; i++) { out.writeDouble(ratio[i]); } }
@Override public int compareTo(Key other) { int result = this.bytes.length - other.getBytes().length; for (int i = 0; result == 0 && i < bytes.length; i++) { result = this.bytes[i] - other.bytes[i]; } if (result == 0) { result = (int)(this.weight - other.weight); } return result; } }
@Override public boolean equals(Object o) { if (!(o instanceof Key)) { return false; } return this.compareTo((Key)o) == 0; }