public static long exactIntersectionCardinality(SetDigest a, SetDigest b) { checkState(a.isExact(), "exact intersection cannot operate on approximate sets"); checkArgument(b.isExact(), "exact intersection cannot operate on approximate sets"); return Sets.intersection(a.minhash.keySet(), b.minhash.keySet()).size(); }
public static double jaccardIndex(SetDigest a, SetDigest b) { int sizeOfSmallerSet = Math.min(a.minhash.size(), b.minhash.size()); LongSortedSet minUnion = new LongRBTreeSet(a.minhash.keySet()); minUnion.addAll(b.minhash.keySet()); int intersection = 0; int i = 0; for (long key : minUnion) { if (a.minhash.containsKey(key) && b.minhash.containsKey(key)) { intersection++; } i++; if (i >= sizeOfSmallerSet) { break; } } return intersection / (double) sizeOfSmallerSet; }
public void mergeWith(SetDigest other) { hll.mergeWith(other.hll); LongBidirectionalIterator iterator = other.minhash.keySet().iterator(); while (iterator.hasNext()) { long key = iterator.nextLong(); int count = minhash.get(key) + other.minhash.get(key); minhash.put(key, Shorts.saturatedCast(count)); } while (minhash.size() > maxHashes) { minhash.remove(minhash.lastLongKey()); } }
public Slice serialize() { try (SliceOutput output = new DynamicSliceOutput(estimatedSerializedSize())) { output.appendByte(UNCOMPRESSED_FORMAT); Slice serializedHll = hll.serialize(); output.appendInt(serializedHll.length()); output.appendBytes(serializedHll); output.appendInt(maxHashes); output.appendInt(minhash.size()); for (long key : minhash.keySet()) { output.appendLong(key); } for (short value : minhash.values()) { output.appendShort(value); } return output.slice(); } catch (IOException e) { throw new UncheckedIOException(e); } }
@Override public LongSortedSet keySet() { if (keys == null) keys = LongSortedSets.unmodifiable(sortedMap.keySet()); return (LongSortedSet) keys; } @Override
@Override public LongSortedSet tailSet(final long from) { return tailMap(from).keySet(); } @Override
@Override public LongSortedSet keySet() { if (keys == null) keys = LongSortedSets.synchronize(sortedMap.keySet(), sync); return (LongSortedSet) keys; } @Override
@Override public LongSortedSet headSet(final long to) { return headMap(to).keySet(); } @Override
@Override public LongSortedSet subSet(final long from, final long to) { return subMap(from, to).keySet(); } @Override
public static long exactIntersectionCardinality(SetDigest a, SetDigest b) { checkState(a.isExact(), "exact intersection cannot operate on approximate sets"); checkArgument(b.isExact(), "exact intersection cannot operate on approximate sets"); return Sets.intersection(a.minhash.keySet(), b.minhash.keySet()).size(); }
public static long exactIntersectionCardinality(SetDigest a, SetDigest b) { checkState(a.isExact(), "exact intersection cannot operate on approximate sets"); checkArgument(b.isExact(), "exact intersection cannot operate on approximate sets"); return Sets.intersection(a.minhash.keySet(), b.minhash.keySet()).size(); }
public static double jaccardIndex(SetDigest a, SetDigest b) { int sizeOfSmallerSet = Math.min(a.minhash.size(), b.minhash.size()); LongSortedSet minUnion = new LongRBTreeSet(a.minhash.keySet()); minUnion.addAll(b.minhash.keySet()); int intersection = 0; int i = 0; for (long key : minUnion) { if (a.minhash.containsKey(key) && b.minhash.containsKey(key)) { intersection++; } i++; if (i >= sizeOfSmallerSet) { break; } } return intersection / (double) sizeOfSmallerSet; }
public static double jaccardIndex(SetDigest a, SetDigest b) { int sizeOfSmallerSet = Math.min(a.minhash.size(), b.minhash.size()); LongSortedSet minUnion = new LongRBTreeSet(a.minhash.keySet()); minUnion.addAll(b.minhash.keySet()); int intersection = 0; int i = 0; for (long key : minUnion) { if (a.minhash.containsKey(key) && b.minhash.containsKey(key)) { intersection++; } i++; if (i >= sizeOfSmallerSet) { break; } } return intersection / (double) sizeOfSmallerSet; }
public void mergeWith(SetDigest other) { hll.mergeWith(other.hll); LongBidirectionalIterator iterator = other.minhash.keySet().iterator(); while (iterator.hasNext()) { long key = iterator.nextLong(); int count = minhash.get(key) + other.minhash.get(key); minhash.put(key, Shorts.saturatedCast(count)); } while (minhash.size() > maxHashes) { minhash.remove(minhash.lastLongKey()); } }
public void mergeWith(SetDigest other) { hll.mergeWith(other.hll); LongBidirectionalIterator iterator = other.minhash.keySet().iterator(); while (iterator.hasNext()) { long key = iterator.nextLong(); int count = minhash.get(key) + other.minhash.get(key); minhash.put(key, Shorts.saturatedCast(count)); } while (minhash.size() > maxHashes) { minhash.remove(minhash.lastLongKey()); } }
public Slice serialize() { try (SliceOutput output = new DynamicSliceOutput(estimatedSerializedSize())) { output.appendByte(UNCOMPRESSED_FORMAT); Slice serializedHll = hll.serialize(); output.appendInt(serializedHll.length()); output.appendBytes(serializedHll); output.appendInt(maxHashes); output.appendInt(minhash.size()); for (long key : minhash.keySet()) { output.appendLong(key); } for (short value : minhash.values()) { output.appendShort(value); } return output.slice(); } catch (IOException e) { throw new UncheckedIOException(e); } }
public Slice serialize() { try (SliceOutput output = new DynamicSliceOutput(estimatedSerializedSize())) { output.appendByte(UNCOMPRESSED_FORMAT); Slice serializedHll = hll.serialize(); output.appendInt(serializedHll.length()); output.appendBytes(serializedHll); output.appendInt(maxHashes); output.appendInt(minhash.size()); for (long key : minhash.keySet()) { output.appendLong(key); } for (short value : minhash.values()) { output.appendShort(value); } return output.slice(); } catch (IOException e) { throw new UncheckedIOException(e); } }