/** * Estimates the cardinality of the set (number of unique values presented to the sketch) * @return best estimate of the number of unique values */ public double getEstimate() { if (!isEstimationMode()) { return getRetainedEntries(); } return getRetainedEntries() / getTheta(); }
/** * Gets the approximate upper error bound given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a> * @return the upper bound. */ public double getUpperBound(final int numStdDev) { if (!isEstimationMode()) { return getRetainedEntries(); } return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); }
private long[] convertToHashTable(final Sketch<S> sketch) { final int size = Math.max( ceilingPowerOf2((int) Math.ceil(sketch.getRetainedEntries() / REBUILD_THRESHOLD)), 1 << MIN_LG_NOM_LONGS ); final long[] hashTable = new long[size]; HashOperations.hashArrayInsert( sketch.keys_, hashTable, Integer.numberOfTrailingZeros(size), theta_); return hashTable; }
/** * Gets the approximate lower error bound given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a> * @return the lower bound. */ public double getLowerBound(final int numStdDev) { if (!isEstimationMode()) { return getRetainedEntries(); } return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); }
@Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); sb.append(" Estimate : ").append(getEstimate()).append(LS); sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); sb.append(" Theta (double) : ").append(this.getTheta()).append(LS); sb.append(" Theta (long) : ").append(this.getThetaLong()).append(LS); sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); sb.append(" Empty? : ").append(isEmpty()).append(LS); sb.append(" Retained Entries : ").append(this.getRetainedEntries()).append(LS); if (this instanceof UpdatableSketch) { @SuppressWarnings("rawtypes") final UpdatableSketch updatable = (UpdatableSketch) this; sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); } sb.append("### END SKETCH SUMMARY").append(LS); return sb.toString(); }
private void getNoMatchSetFromSketch(final Sketch<S> sketch) { if (sketch instanceof CompactSketch) { keys_ = sketch.keys_.clone(); summaries_ = sketch.summaries_.clone(); } else { // assuming only two types: CompactSketch and QuickSelectSketch final CompactSketch<S> compact = ((QuickSelectSketch<S>)sketch).compact(); keys_ = compact.keys_; summaries_ = compact.summaries_; } count_ = sketch.getRetainedEntries(); } }
final long thetaB = b == null ? Long.MAX_VALUE : b.getThetaLong(); theta_ = Math.min(thetaA, thetaB); if (a == null || a.getRetainedEntries() == 0) { return; } if (b == null || b.getRetainedEntries() == 0) { getNoMatchSetFromSketch(a); } else { final int noMatchSize = a.getRetainedEntries(); keys_ = new long[noMatchSize]; summaries_ = (S[]) Array.newInstance(a.summaries_.getClass().getComponentType(), noMatchSize);
@Test public void serialVersion1Compatibility() throws Exception { byte[] bytes = TestUtil.readBytesFromFile(getClass().getClassLoader() .getResource("CompactSketchWithDoubleSummary4K_serialVersion1.bin").getFile()); Sketch<DoubleSummary> sketch = Sketches.heapifySketch(Memory.wrap(bytes), new DoubleSummaryDeserializer()); Assert.assertTrue(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.99); Assert.assertEquals(sketch.getRetainedEntries(), 4096); int count = 0; SketchIterator<DoubleSummary> it = sketch.iterator(); while (it.next()) { Assert.assertEquals(it.getSummary().getValue(), 1.0); count++; } Assert.assertEquals(count, 4096); }
if (isEmpty_ || (sketchIn.getRetainedEntries() == 0)) { sketch_ = null; return; sketch_ = new QuickSelectSketch<>(sketchIn.getRetainedEntries(), ResizeFactor.X1.lg(), null); final SketchIterator<S> it = sketchIn.iterator(); while (it.next()) { return; final int matchSize = min(sketch_.getRetainedEntries(), sketchIn.getRetainedEntries()); final long[] matchKeys = new long[matchSize]; S[] matchSummaries = null;
@Test public void serializeDeserializeSampling() throws Exception { int sketchSize = 16384; int numberOfUniques = sketchSize; UpdatableSketch<Double, DoubleSummary> sketch1 = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()) .setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) { sketch1.update(i, 1.0); } Sketch<DoubleSummary> sketch2 = Sketches.heapifySketch( Memory.wrap(sketch1.toByteArray()), new DoubleSummaryDeserializer()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test public void serializeDeserializeSmallExact() { UpdatableSketch<Double, DoubleSummary> us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build(); us.update("a", 1.0); us.update("b", 1.0); us.update("c", 1.0); CompactSketch<DoubleSummary> sketch1 = us.compact(); Sketch<DoubleSummary> sketch2 = Sketches.heapifySketch(Memory.wrap(sketch1.toByteArray()), new DoubleSummaryDeserializer()); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 3.0); Assert.assertEquals(sketch2.getLowerBound(1), 3.0); Assert.assertEquals(sketch2.getUpperBound(1), 3.0); Assert.assertEquals(sketch2.getRetainedEntries(), 3); Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch2.getTheta(), 1.0); SketchIterator<DoubleSummary> it = sketch2.iterator(); int count = 0; while (it.next()) { Assert.assertEquals(it.getSummary().getValue(), 1.0); count++; } Assert.assertEquals(count, 3); }
/** * Filters elements on the provided {@link Sketch} * * @param sketchIn The sketch against which apply the {@link Predicate} * @return A new Sketch with some of the entries filtered out based on the {@link Predicate} */ public CompactSketch<T> filter(final Sketch<T> sketchIn) { if (sketchIn == null) { return new CompactSketch<>(null, null, Long.MAX_VALUE, true); } final QuickSelectSketch<T> sketch = new QuickSelectSketch<>(sketchIn.getRetainedEntries(), ResizeFactor.X1.lg(), null); final SketchIterator<T> it = sketchIn.iterator(); while (it.next()) { final T summary = it.getSummary(); if (predicate.test(summary)) { sketch.insert(it.getKey(), summary.copy()); } } sketch.setThetaLong(sketchIn.getThetaLong()); if (!sketchIn.isEmpty()) { sketch.setNotEmpty(); } return sketch.compact(); } }
/** * Estimates the cardinality of the set (number of unique values presented to the sketch) * @return best estimate of the number of unique values */ public double getEstimate() { if (!isEstimationMode()) { return getRetainedEntries(); } return getRetainedEntries() / getTheta(); }
private long[] convertToHashTable(final Sketch<S> sketch) { final int size = Math.max( ceilingPowerOf2((int) Math.ceil(sketch.getRetainedEntries() / REBUILD_THRESHOLD)), 1 << MIN_LG_NOM_LONGS ); final long[] hashTable = new long[size]; HashOperations.hashArrayInsert( sketch.keys_, hashTable, Integer.numberOfTrailingZeros(size), theta_); return hashTable; }
/** * Gets the approximate upper error bound given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a> * @return the upper bound. */ public double getUpperBound(final int numStdDev) { if (!isEstimationMode()) { return getRetainedEntries(); } return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); }
/** * Gets the approximate lower error bound given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a> * @return the lower bound. */ public double getLowerBound(final int numStdDev) { if (!isEstimationMode()) { return getRetainedEntries(); } return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); }
@Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); sb.append(" Estimate : ").append(getEstimate()).append(LS); sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); sb.append(" Theta (double) : ").append(this.getTheta()).append(LS); sb.append(" Theta (long) : ").append(this.getThetaLong()).append(LS); sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); sb.append(" Empty? : ").append(isEmpty()).append(LS); sb.append(" Retained Entries : ").append(this.getRetainedEntries()).append(LS); if (this instanceof UpdatableSketch) { @SuppressWarnings("rawtypes") final UpdatableSketch updatable = (UpdatableSketch) this; sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); } sb.append("### END SKETCH SUMMARY").append(LS); return sb.toString(); }
final long thetaB = b == null ? Long.MAX_VALUE : b.getThetaLong(); theta_ = Math.min(thetaA, thetaB); if (a == null || a.getRetainedEntries() == 0) { return; } if (b == null || b.getRetainedEntries() == 0) { getNoMatchSetFromSketch(a); } else { final int noMatchSize = a.getRetainedEntries(); keys_ = new long[noMatchSize]; summaries_ = (S[]) Array.newInstance(a.summaries_.getClass().getComponentType(), noMatchSize);
private void getNoMatchSetFromSketch(final Sketch<S> sketch) { if (sketch instanceof CompactSketch) { keys_ = sketch.keys_.clone(); summaries_ = sketch.summaries_.clone(); } else { // assuming only two types: CompactSketch and QuickSelectSketch final CompactSketch<S> compact = ((QuickSelectSketch<S>)sketch).compact(); keys_ = compact.keys_; summaries_ = compact.summaries_; } count_ = sketch.getRetainedEntries(); } }
/** * Filters elements on the provided {@link Sketch} * * @param sketchIn The sketch against which apply the {@link Predicate} * @return A new Sketch with some of the entries filtered out based on the {@link Predicate} */ public CompactSketch<T> filter(final Sketch<T> sketchIn) { if (sketchIn == null) { return new CompactSketch<>(null, null, Long.MAX_VALUE, true); } final QuickSelectSketch<T> sketch = new QuickSelectSketch<>(sketchIn.getRetainedEntries(), ResizeFactor.X1.lg(), null); final SketchIterator<T> it = sketchIn.iterator(); while (it.next()) { final T summary = it.getSummary(); if (predicate.test(summary)) { sketch.insert(it.getKey(), summary.copy()); } } sketch.setThetaLong(sketchIn.getThetaLong()); if (!sketchIn.isEmpty()) { sketch.setNotEmpty(); } return sketch.compact(); } }