private static void generateSketches() throws Exception { Path path = FileSystems.getDefault().getPath("array_of_doubles_sketch_data.tsv"); try (BufferedWriter out = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { Random rand = ThreadLocalRandom.current(); int key = 0; for (int i = 0; i < 20; i++) { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(1024) .build(); sketch.update(key++, new double[] {1}); sketch.update(key++, new double[] {1}); out.write("2015010101"); out.write('\t'); out.write("product_" + (rand.nextInt(10) + 1)); out.write('\t'); out.write(StringUtils.encodeBase64String(sketch.compact().toByteArray())); out.newLine(); } } }
/** * This method uses synchronization because it can be used during indexing, * and Druid can call aggregate() and get() concurrently * https://github.com/apache/incubator-druid/pull/3956 */ @Override public void aggregate() { final IndexedInts keys = keySelector.getRow(); for (int i = 0; i < valueSelectors.length; i++) { values[i] = valueSelectors[i].getDouble(); } synchronized (this) { for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { final String key = keySelector.lookupName(keys.get(i)); sketch.update(key, values); } } }
/** * This method uses locks because it can be used during indexing, * and Druid can call aggregate() and get() concurrently * https://github.com/apache/incubator-druid/pull/3956 */ @Override public void aggregate(final ByteBuffer buf, final int position) { for (int i = 0; i < valueSelectors.length; i++) { values[i] = valueSelectors[i].getDouble(); } final IndexedInts keys = keySelector.getRow(); // Wrapping memory and ArrayOfDoublesSketch is inexpensive compared to sketch operations. // Maintaining a cache of wrapped objects per buffer position like in Theta sketch aggregator // might might be considered, but it would increase complexity including relocate() support. final WritableMemory mem = WritableMemory.wrap(buf, ByteOrder.LITTLE_ENDIAN); final WritableMemory region = mem.writableRegion(position, maxIntermediateSize); final Lock lock = stripedLock.getAt(lockIndex(position)).writeLock(); lock.lock(); try { final ArrayOfDoublesUpdatableSketch sketch = ArrayOfDoublesSketches.wrapUpdatableSketch(region); for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { final String key = keySelector.lookupName(keys.get(i)); sketch.update(key, values); } } finally { lock.unlock(); } }
/** * Updates this sketch with a long key and double values. * The values will be stored or added to the ones associated with the key * * @param key The given long key * @param values The given values */ public void update(final long key, final double[] values) { update(new long[] {key}, values); }
@Test public void makeAggregateCombiner() { AggregatorFactory aggregatorFactory = new ArrayOfDoublesSketchAggregatorFactory("", "", null, null, null); AggregatorFactory combiningFactory = aggregatorFactory.getCombiningFactory(); AggregateCombiner<ArrayOfDoublesSketch> combiner = combiningFactory.makeAggregateCombiner(); ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update("a", new double[] {1}); ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch2.update("b", new double[] {1}); sketch2.update("c", new double[] {1}); TestObjectColumnSelector<ArrayOfDoublesSketch> selector = new TestObjectColumnSelector<ArrayOfDoublesSketch>(new ArrayOfDoublesSketch[] {sketch1, sketch2}); combiner.reset(selector); Assert.assertEquals(1, combiner.getObject().getEstimate(), 0); selector.increment(); combiner.fold(selector); Assert.assertEquals(3, combiner.getObject().getEstimate(), 0); }
/** * Updates this sketch with a String key and double values. * The values will be stored or added to the ones associated with the key * * @param key The given String key * @param values The given values */ public void update(final String key, final double[] values) { update(Util.stringToByteArray(key), values); }
/** * Updates this sketch with a double key and double values. * The values will be stored or added to the ones associated with the key * * @param key The given double key * @param values The given values */ public void update(final double key, final double[] values) { update(Util.doubleToLongArray(key), values); }
private static void noopUpdates(ArrayOfDoublesUpdatableSketch sketch, double[] valuesArr) { byte[] byteArr = null; sketch.update(byteArr, valuesArr); byteArr = new byte[0]; sketch.update(byteArr, valuesArr); int[] intArr = null; sketch.update(intArr, valuesArr); intArr = new int[0]; sketch.update(intArr, valuesArr); long[] longArr = null; sketch.update(longArr, valuesArr); longArr = new long[0]; sketch.update(longArr, valuesArr); }
@Test public void updatesOfAllKeyTypes() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch.update(1L, new double[] {1.0}); sketch.update(2.0, new double[] {1.0}); sketch.update(new byte[] {3}, new double[] {1.0}); sketch.update(new int[] {4}, new double[] {1.0}); sketch.update(new long[] {5L}, new double[] {1.0}); sketch.update("a", new double[] {1.0}); Assert.assertEquals(sketch.getEstimate(), 6.0); }
@Test public void updatesOfAllKeyTypes() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). build(WritableMemory.wrap(new byte[1000000])); sketch.update(1L, new double[] {1.0}); sketch.update(2.0, new double[] {1.0}); sketch.update(new byte[] {3}, new double[] {1.0}); sketch.update(new int[] {4}, new double[] {1.0}); sketch.update(new long[] {5L}, new double[] {1.0}); sketch.update("a", new double[] {1.0}); Assert.assertEquals(sketch.getEstimate(), 6.0); }
@Test public void doubleSum() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch.update(1, new double[] {1.0}); Assert.assertEquals(sketch.getRetainedEntries(), 1); Assert.assertEquals(sketch.getValues()[0][0], 1.0); sketch.update(1, new double[] {0.7}); Assert.assertEquals(sketch.getRetainedEntries(), 1); Assert.assertEquals(sketch.getValues()[0][0], 1.7); sketch.update(1, new double[] {0.8}); Assert.assertEquals(sketch.getRetainedEntries(), 1); Assert.assertEquals(sketch.getValues()[0][0], 2.5); }
@Test public void heapifyAndUpdateSketch() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1}); // downcasting is not recommended, for testing only ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); sketch2.update(2, new double[] {1}); Assert.assertEquals(sketch2.getEstimate(), 2.0); }
@Test(expectedExceptions = SketchesArgumentException.class) public void checkInsertExceptions() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); sketch1.update("a", new double[] {1.0}); }
@Test public void sampling() { float samplingProbability = 0.001f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); sketch.update("a", new double[] {1.0}); Assert.assertFalse(sketch.isEmpty()); Assert.assertTrue(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertTrue(sketch.getUpperBound(1) > 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); Assert.assertEquals(sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test(expectedExceptions = SketchesArgumentException.class) public void fromQuickSelectSketchNotEnoughMemory() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); us.update(1, new double[] {1.0}); us.compact(WritableMemory.wrap(new byte[39])); }
@Test public void notEmptyNoEntries() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.01f).build(); sketch1.update("a", new double[] {1}); // this happens to get rejected because of sampling with low probability ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); intersection.update(sketch1, null); ArrayOfDoublesCompactSketch result = intersection.getResult(); Assert.assertTrue(result.isEmpty()); Assert.assertEquals(result.getRetainedEntries(), 0); Assert.assertEquals(result.getEstimate(), 0.0); Assert.assertEquals(result.getLowerBound(1), 0.0); Assert.assertEquals(result.getUpperBound(1), 0.0); Assert.assertEquals(result.getValues().length, 0); }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) sketch1.update(i, new double[] {1.0}); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test(expectedExceptions = SketchesArgumentException.class) public void deserializeWithWrongSeed() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } ArrayOfDoublesCompactSketch sketch1 = us.compact(); ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray()), 123); } }
@Test public void serializeDeserializeEstimation() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } ArrayOfDoublesCompactSketch sketch1 = us.compact(WritableMemory.wrap(new byte[1000000])); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(WritableMemory.wrap(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); }
@Test(expectedExceptions = SketchesArgumentException.class) public void deserializeWithWrongSeed() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } ArrayOfDoublesCompactSketch sketch1 = us.compact(WritableMemory.wrap(new byte[1000000])); ArrayOfDoublesSketches.wrapSketch(WritableMemory.wrap(sketch1.toByteArray()), 123); }