@Test public void testTableSampleBernoulli() { DescriptiveStatistics stats = new DescriptiveStatistics(); int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().size(); for (int i = 0; i < 100; i++) { List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)").getMaterializedRows(); assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows"); stats.addValue(values.size() * 1.0 / total); } double mean = stats.getGeometricMean(); assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean)); }
@Override public double getGeometricMean() { return descStats.getGeometricMean(); }
public double getGeometricMean(){ return stats.getGeometricMean(); }
@Test public void testTableSampleBernoulli() { DescriptiveStatistics stats = new DescriptiveStatistics(); int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().size(); for (int i = 0; i < 100; i++) { List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)").getMaterializedRows(); assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows"); stats.addValue(values.size() * 1.0 / total); } double mean = stats.getGeometricMean(); assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean)); }
@Test public void testTableSampleBernoulli() { DescriptiveStatistics stats = new DescriptiveStatistics(); int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().size(); for (int i = 0; i < 100; i++) { List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)").getMaterializedRows(); assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows"); stats.addValue(values.size() * 1.0 / total); } double mean = stats.getGeometricMean(); assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean)); }
@Override public Object doWork(Object value) throws IOException { if(!(value instanceof List<?>)){ throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting a numeric list but found %s", toExpression(constructingFactory), value.getClass().getSimpleName())); } // we know each value is a BigDecimal or a list of BigDecimals DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics(); ((List<?>)value).stream().mapToDouble(innerValue -> ((Number)innerValue).doubleValue()).forEach(innerValue -> descriptiveStatistics.addValue(innerValue)); Map<String,Number> map = new HashMap<>(); map.put("max", descriptiveStatistics.getMax()); map.put("mean", descriptiveStatistics.getMean()); map.put("min", descriptiveStatistics.getMin()); map.put("stdev", descriptiveStatistics.getStandardDeviation()); map.put("sum", descriptiveStatistics.getSum()); map.put("N", descriptiveStatistics.getN()); map.put("var", descriptiveStatistics.getVariance()); map.put("kurtosis", descriptiveStatistics.getKurtosis()); map.put("skewness", descriptiveStatistics.getSkewness()); map.put("popVar", descriptiveStatistics.getPopulationVariance()); map.put("geometricMean", descriptiveStatistics.getGeometricMean()); map.put("sumsq", descriptiveStatistics.getSumsq()); return new Tuple(map); } }
(long) size.getMin(), (long) size.getPercentile(25), (long) size.getPercentile(50), (long) size.getPercentile(75), (long) size.getMax(), size.getGeometricMean(), size.getMean(), size.getStandardDeviation())); } else { (long) time.getMin(), (long) time.getPercentile(25), (long) time.getPercentile(50), (long) time.getPercentile(75), (long) time.getMax(), time.getGeometricMean(), time.getMean(), time.getStandardDeviation()));