private History(int windowSize) { this.samples = new DescriptiveStatistics(windowSize); }
/** * Returns a copy of this DescriptiveStatistics instance with the same internal state. * * @return a copy of this */ public DescriptiveStatistics copy() { DescriptiveStatistics result = new DescriptiveStatistics(); // No try-catch or advertised exception because parms are guaranteed valid copy(this, result); return result; }
@Override public Double summarize(NumericColumn<?> column) { return new DescriptiveStatistics(removeMissing(column)).getQuadraticMean(); } };
private @Nullable DescriptiveStatistics getNormalizedStatistics(String key, Collection<? extends Number> values) { long now = System.currentTimeMillis(); long deltaTime = 0; if (this.lastReportTimes.containsKey(key)) { deltaTime = now - this.lastReportTimes.get(key); } this.lastReportTimes.put(key, now); if (deltaTime == 0) { return null; } double[] normalizedValues = new double[values.size()]; int i = 0; for (Number value : values) { normalizedValues[i++] = 1000 * value.doubleValue() / deltaTime; } return new DescriptiveStatistics(normalizedValues); } }
@Test public void testTableSampleBernoulli() { DescriptiveStatistics stats = new DescriptiveStatistics(); int total = computeExpected("SELECT orderkey FROM orders", ImmutableList.of(BIGINT)).getMaterializedRows().size(); for (int i = 0; i < 100; i++) { List<MaterializedRow> values = computeActual("SELECT orderkey FROM orders TABLESAMPLE BERNOULLI (50)").getMaterializedRows(); assertEquals(values.size(), ImmutableSet.copyOf(values).size(), "TABLESAMPLE produced duplicate rows"); stats.addValue(values.size() * 1.0 / total); } double mean = stats.getGeometricMean(); assertTrue(mean > 0.45 && mean < 0.55, format("Expected mean sampling rate to be ~0.5, but was %s", mean)); }
private DescriptiveStatistics getDescriptiveStatistics( final LinkedList<InMemoryHistoryNode> selectedLists) throws ClassCastException { final DescriptiveStatistics descStats = new DescriptiveStatistics(); for (final InMemoryHistoryNode node : selectedLists) { descStats.addValue(((Number) node.getValue()).doubleValue()); } return descStats; }
DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < iterations; i++) { if (pre != null) {
/** * Get new statistics that excludes values beyond mean +/- 2 * stdev * * @param data Source data * @return new {@link @DescriptiveStatistics objects with sanitized data} */ private static DescriptiveStatistics sanitize(@NotNull DescriptiveStatistics data) { double meanValue = data.getMean(); double delta = MARGIN * meanValue; double minVal = meanValue - delta; double maxVal = meanValue + delta; return new DescriptiveStatistics(Arrays.stream(data.getValues()) .filter(x -> x > minVal && x < maxVal) .toArray()); }
private SimpleStats calculateStats(StatsType st) { if (! isAnyNumeric()) return SimpleStats.na(); DescriptiveStatistics stats = new DescriptiveStatistics(); for (ColMeta col : _cols) { if (!col._ignored && !col._response && col._isNumeric) { double v; switch (st) { case Skewness: v = col._skew; break; case Kurtosis: v = col._kurtosis; break; case Cardinality: v = col._cardinality; break; default: throw new IllegalStateException("Unsupported type " + st); } stats.addValue(v); } } return SimpleStats.from(stats); }
static DescriptiveStatistics benchmarkListManyPartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, int howMany) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; createPartitionedTable(client, dbName, tableName); try { addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), howMany); LOG.debug("Created {} partitions", howMany); LOG.debug("started benchmark... "); return bench.measure(() -> throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); } catch (TException e) { e.printStackTrace(); return new DescriptiveStatistics(); } finally { throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); } }
static DescriptiveStatistics benchmarkGetPartitions(@NotNull MicroBenchmark bench, @NotNull BenchData data, int howMany) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; createPartitionedTable(client, dbName, tableName); try { addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), howMany); LOG.debug("Created {} partitions", howMany); LOG.debug("started benchmark... "); return bench.measure(() -> throwingSupplierWrapper(() -> client.getPartitions(dbName, tableName))); } catch (TException e) { e.printStackTrace(); return new DescriptiveStatistics(); } finally { throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); } }
/** * Normalize (standardize) the sample, so it is has a mean of 0 and a standard deviation of 1. * * @param sample Sample to normalize. * @return normalized (standardized) sample. * @since 2.2 */ public static double[] normalize(final double[] sample) { DescriptiveStatistics stats = new DescriptiveStatistics(); // Add the data from the series to stats for (int i = 0; i < sample.length; i++) { stats.addValue(sample[i]); } // Compute mean and standard deviation double mean = stats.getMean(); double standardDeviation = stats.getStandardDeviation(); // initialize the standardizedSample, which has the same length as the sample double[] standardizedSample = new double[sample.length]; for (int i = 0; i < sample.length; i++) { // z = (x- mean)/standardDeviation standardizedSample[i] = (sample[i] - mean) / standardDeviation; } return standardizedSample; }
static DescriptiveStatistics benchmarkListPartition(@NotNull MicroBenchmark bench, @NotNull BenchData data) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; createPartitionedTable(client, dbName, tableName); try { addManyPartitions(client, dbName, tableName, null, Collections.singletonList("d"), 1); return bench.measure(() -> throwingSupplierWrapper(() -> client.listPartitions(dbName, tableName))); } catch (TException e) { e.printStackTrace(); return new DescriptiveStatistics(); } finally { throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); } }
System.out.println("Using Groovy " + GROOVY_VERSION); DescriptiveStatistics stats = new DescriptiveStatistics();
private void logStats(Set<?> set, double[] raw) { DescriptiveStatistics descStats = new DescriptiveStatistics(raw); Log.i( TAG, String.format( Locale.US, STAT_FORMAT, set.getClass().getSimpleName(), set.size(), descStats.getN(), Math.round(descStats.getMin()), Math.round(descStats.getMax()), Math.round(descStats.getMean()), descStats.getStandardDeviation(), descStats.getVariance() ) ); } }
@Test(dataProvider = "provideStandardErrors") public void testMultiplePositions(double maxStandardError) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < 500; ++i) { int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1; List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5)); long actual = estimateGroupByCount(values, maxStandardError); double error = (actual - uniques) * 1.0 / uniques; stats.addValue(error); } assertLessThan(stats.getMean(), 1.0e-2); assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError); }
static DescriptiveStatistics benchmarkCreatePartition(@NotNull MicroBenchmark bench, @NotNull BenchData data) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; createPartitionedTable(client, dbName, tableName); final List<String> values = Collections.singletonList("d1"); try { Table t = client.getTable(dbName, tableName); Partition partition = new Util.PartitionBuilder(t) .withValues(values) .build(); return bench.measure(null, () -> throwingSupplierWrapper(() -> client.addPartition(partition)), () -> throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, values))); } catch (TException e) { e.printStackTrace(); return new DescriptiveStatistics(); } finally { throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); } }
static DescriptiveStatistics benchmarkDropPartition(@NotNull MicroBenchmark bench, @NotNull BenchData data) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; createPartitionedTable(client, dbName, tableName); final List<String> values = Collections.singletonList("d1"); try { Table t = client.getTable(dbName, tableName); Partition partition = new Util.PartitionBuilder(t) .withValues(values) .build(); return bench.measure( () -> throwingSupplierWrapper(() -> client.addPartition(partition)), () -> throwingSupplierWrapper(() -> client.dropPartition(dbName, tableName, values)), null); } catch (TException e) { e.printStackTrace(); return new DescriptiveStatistics(); } finally { throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); } }
static DescriptiveStatistics benchmarkRenameTable(@NotNull MicroBenchmark bench, @NotNull BenchData data, int count) { final HMSClient client = data.getClient(); String dbName = data.dbName; String tableName = data.tableName; createPartitionedTable(client, dbName, tableName); try { addManyPartitionsNoException(client, dbName, tableName, null, Collections.singletonList("d"), count); Table oldTable = client.getTable(dbName, tableName); oldTable.getSd().setLocation(""); Table newTable = oldTable.deepCopy(); newTable.setTableName(tableName + "_renamed"); return bench.measure( () -> { // Measuring 2 renames, so the tests are idempotent throwingSupplierWrapper(() -> client.alterTable(oldTable.getDbName(), oldTable.getTableName(), newTable)); throwingSupplierWrapper(() -> client.alterTable(newTable.getDbName(), newTable.getTableName(), oldTable)); } ); } catch (TException e) { e.printStackTrace(); return new DescriptiveStatistics(); } finally { throwingSupplierWrapper(() -> client.dropTable(dbName, tableName)); } }
@Test public void testSummarize() { IntColumn c = IntColumn.indexColumn("t", 99, 1); IntColumn c2 = c.copy(); c2.appendCell(""); double c2Variance = c2.variance(); double cVariance = StatUtils.variance(c.asDoubleArray()); assertEquals(cVariance, c2Variance, 0.00001); assertEquals(StatUtils.sumLog(c.asDoubleArray()), c2.sumOfLogs(), 0.00001); assertEquals(StatUtils.sumSq(c.asDoubleArray()), c2.sumOfSquares(), 0.00001); assertEquals(StatUtils.geometricMean(c.asDoubleArray()), c2.geometricMean(), 0.00001); assertEquals(StatUtils.product(c.asDoubleArray()), c2.product(), 0.00001); assertEquals(StatUtils.populationVariance(c.asDoubleArray()), c2.populationVariance(), 0.00001); assertEquals(new DescriptiveStatistics(c.asDoubleArray()).getQuadraticMean(), c2.quadraticMean(), 0.00001); assertEquals(new DescriptiveStatistics(c.asDoubleArray()).getStandardDeviation(), c2.standardDeviation(), 0.00001); assertEquals(new DescriptiveStatistics(c.asDoubleArray()).getKurtosis(), c2.kurtosis(), 0.00001); assertEquals(new DescriptiveStatistics(c.asDoubleArray()).getSkewness(), c2.skewness(), 0.00001); assertEquals(StatUtils.variance(c.asDoubleArray()), c.variance(), 0.00001); assertEquals(StatUtils.sumLog(c.asDoubleArray()), c.sumOfLogs(), 0.00001); assertEquals(StatUtils.sumSq(c.asDoubleArray()), c.sumOfSquares(), 0.00001); assertEquals(StatUtils.geometricMean(c.asDoubleArray()), c.geometricMean(), 0.00001); assertEquals(StatUtils.product(c.asDoubleArray()), c.product(), 0.00001); assertEquals(StatUtils.populationVariance(c.asDoubleArray()), c.populationVariance(), 0.00001); assertEquals(new DescriptiveStatistics(c.asDoubleArray()).getQuadraticMean(), c.quadraticMean(), 0.00001); assertEquals(new DescriptiveStatistics(c.asDoubleArray()).getStandardDeviation(), c.standardDeviation(), 0.00001); assertEquals(new DescriptiveStatistics(c.asDoubleArray()).getKurtosis(), c.kurtosis(), 0.00001); assertEquals(new DescriptiveStatistics(c.asDoubleArray()).getSkewness(), c.skewness(), 0.00001); }