@SuppressWarnings("unchecked") private Statistics<T> getStatistics(boolean min, boolean max) { Statistics<T> statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.BOOLEAN); ((BooleanStatistics) statistics).setMinMax(min, max); return statistics; }
@SuppressWarnings("unchecked") private Statistics<T> getStatistics(float min, float max) { final Statistics<T> statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.FLOAT); ((FloatStatistics) statistics).setMinMax(min, max); return statistics; }
@SuppressWarnings("unchecked") private Statistics<T> getStatistics(String min, String max) { final Statistics<T> statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.BINARY); ((BinaryStatistics) statistics).setMinMaxFromBytes(min.getBytes(), max.getBytes()); return statistics; }
@SuppressWarnings("unchecked") private Statistics<T> getStatistics(int min, int max) { final Statistics<T> statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.INT32); ((IntStatistics) statistics).setMinMax(min, max); return statistics; }
@SuppressWarnings("unchecked") private Statistics<T> getStatistics(long min, long max) { final Statistics statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.INT64); ((LongStatistics) statistics).setMinMax(min, max); return statistics; }
@SuppressWarnings("unchecked") private Statistics<T> getStatistics(double min, double max) { final Statistics<T> statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.DOUBLE); ((DoubleStatistics) statistics).setMinMax(min, max); return statistics; }
public static org.apache.parquet.column.statistics.Statistics<?> readStats(Statistics statistics, PrimitiveTypeName type) { org.apache.parquet.column.statistics.Statistics<?> stats = org.apache.parquet.column.statistics.Statistics.getStatsBasedOnType(type); if (statistics != null) { if (statistics.isSetMax() && statistics.isSetMin()) { stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array()); } stats.setNumNulls(statistics.null_count); } return stats; }
public static org.apache.parquet.column.statistics.Statistics<?> readStats(Statistics statistics, PrimitiveTypeName type) { org.apache.parquet.column.statistics.Statistics<?> stats = org.apache.parquet.column.statistics.Statistics.getStatsBasedOnType(type); if (statistics != null) { if (statistics.isSetMax() && statistics.isSetMin()) { stats.setMinMaxFromBytes(statistics.min.array(), statistics.max.array()); } stats.setNumNulls(statistics.null_count); } return stats; }
@SuppressWarnings("unchecked") @Override public Statistics<T> visitTypedFieldExpr(TypedFieldExpr typedFieldExpr, Void value) throws RuntimeException { final ColumnStatistics<T> columnStatistics = columnStatMap.get(typedFieldExpr.getPath()); if (columnStatistics != null) { return columnStatistics.getStatistics(); } else if (typedFieldExpr.getMajorType().equals(Types.OPTIONAL_INT)) { // field does not exist. Statistics<T> statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.INT32); statistics.setNumNulls(rowCount); // all values are nulls return statistics; } return null; }
Statistics stat = Statistics.getStatsBasedOnType(primitiveType); Statistics convertedStat = stat;
@Test public void testMatchesWithStatistics() throws ParquetCorruptionException { String value = "Test"; ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0); RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), utf8Slice(value)); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); Statistics stats = getStatsBasedOnType(column.getType()); stats.setNumNulls(1L); stats.setMinMaxFromBytes(value.getBytes(), value.getBytes()); assertTrue(parquetPredicate.matches(2, singletonMap(column, stats), ID, true)); }