private static FloatStatistics floatColumnStats(float minimum, float maximum) { FloatStatistics statistics = new FloatStatistics(); statistics.setMinMax(minimum, maximum); return statistics; }
private static DoubleStatistics doubleColumnStats(double minimum, double maximum) { DoubleStatistics statistics = new DoubleStatistics(); statistics.setMinMax(minimum, maximum); return statistics; }
private static IntStatistics intColumnStats(int minimum, int maximum) { IntStatistics statistics = new IntStatistics(); statistics.setMinMax(minimum, maximum); return statistics; } }
if (statistics == null || statistics.isEmpty()) { return Domain.all(type); if (statistics.getNumNulls() == rowCount) { return Domain.onlyNull(type); boolean hasNullValue = statistics.getNumNulls() != 0L; if (statistics.genericGetMin() == null || statistics.genericGetMax() == null) { return Domain.create(ValueSet.all(type), hasNullValue); boolean hasTrueValues = !(booleanStatistics.getMax() == false && booleanStatistics.getMin() == false); boolean hasFalseValues = !(booleanStatistics.getMax() == true && booleanStatistics.getMin() == true); if (hasTrueValues && hasFalseValues) { return Domain.all(type); if (statistics instanceof LongStatistics) { LongStatistics longStatistics = (LongStatistics) statistics; if (longStatistics.genericGetMin() > longStatistics.genericGetMax()) { failWithCorruptionException(failOnCorruptedParquetStatistics, column, id, longStatistics); return Domain.create(ValueSet.all(type), hasNullValue); parquetIntegerStatistics = new ParquetIntegerStatistics(longStatistics.genericGetMin(), longStatistics.genericGetMax()); if (intStatistics.genericGetMin() > intStatistics.genericGetMax()) { failWithCorruptionException(failOnCorruptedParquetStatistics, column, id, intStatistics); return Domain.create(ValueSet.all(type), hasNullValue); parquetIntegerStatistics = new ParquetIntegerStatistics((long) intStatistics.getMin(), (long) intStatistics.getMax());
public static parquet.column.statistics.Statistics<?> readStats(Statistics statistics, PrimitiveTypeName type) { parquet.column.statistics.Statistics<?> stats = parquet.column.statistics.Statistics.getStatsBasedOnType(type); if (statistics != null) { if (statistics.isSetMax() && statistics.isSetMin()) { stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array()); } stats.setNumNulls(statistics.null_count); } return stats; }
private static BooleanStatistics booleanColumnStats(boolean minimum, boolean maximum) { BooleanStatistics statistics = new BooleanStatistics(); statistics.setMinMax(minimum, maximum); return statistics; }
private static LongStatistics longColumnStats(long minimum, long maximum) { LongStatistics statistics = new LongStatistics(); statistics.setMinMax(minimum, maximum); return statistics; }
private static BinaryStatistics stringColumnStats(String minimum, String maximum) { BinaryStatistics statistics = new BinaryStatistics(); statistics.setMinMax(Binary.fromString(minimum), Binary.fromString(maximum)); return statistics; }
@Override public void mergeStatisticsMinMax(Statistics stats) { DoubleStatistics doubleStats = (DoubleStatistics)stats; if (!this.hasNonNullValue()) { initializeStats(doubleStats.getMin(), doubleStats.getMax()); } else { updateStats(doubleStats.getMin(), doubleStats.getMax()); } }
@Override public boolean matches(long numberOfRows, Map<ColumnDescriptor, Statistics<?>> statistics, ParquetDataSourceId id, boolean failOnCorruptedParquetStatistics) throws ParquetCorruptionException { if (numberOfRows == 0) { return false; } ImmutableMap.Builder<ColumnDescriptor, Domain> domains = ImmutableMap.builder(); for (RichColumnDescriptor column : columns) { Statistics<?> columnStatistics = statistics.get(column); Domain domain; Type type = getPrestoType(effectivePredicate, column); if (columnStatistics == null || columnStatistics.isEmpty()) { // no stats for column domain = Domain.all(type); } else { domain = getDomain(type, numberOfRows, columnStatistics, id, column.toString(), failOnCorruptedParquetStatistics); } domains.put(column, domain); } TupleDomain<ColumnDescriptor> stripeDomain = TupleDomain.withColumnDomains(domains.build()); return effectivePredicate.overlaps(stripeDomain); }
@Override public void updateStats(double value) { if (!this.hasNonNullValue()) { initializeStats(value, value); } else { updateStats(value, value); } }
@Override public void updateStats(boolean value) { if (!this.hasNonNullValue()) { initializeStats(value, value); } else { updateStats(value, value); } }
public void initializeStats(double min_value, double max_value) { min = min_value; max = max_value; this.markAsNotEmpty(); }
public void setMinMax(int min, int max) { this.max = max; this.min = min; this.markAsNotEmpty(); } }
public void setMinMax(float min, float max) { this.max = max; this.min = min; this.markAsNotEmpty(); } }
public void setMinMax(Binary min, Binary max) { this.max = max; this.min = min; this.markAsNotEmpty(); } }
@Test public void testMatchesWithStatistics() throws ParquetCorruptionException { String value = "Test"; ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0); RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), utf8Slice(value)); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); Statistics stats = getStatsBasedOnType(column.getType()); stats.setNumNulls(1L); stats.setMinMaxFromBytes(value.getBytes(), value.getBytes()); assertTrue(parquetPredicate.matches(2, singletonMap(column, stats), ID, true)); }
private static LongStatistics longColumnStats(long minimum, long maximum) { LongStatistics statistics = new LongStatistics(); statistics.setMinMax(minimum, maximum); return statistics; }
public void setMinMax(double min, double max) { this.max = max; this.min = min; this.markAsNotEmpty(); } }
public void initializeStats(int min_value, int max_value) { min = min_value; max = max_value; this.markAsNotEmpty(); }