public static Map<String, String> updateStatisticsParameters(Map<String, String> parameters, HiveBasicStatistics statistics) { ImmutableMap.Builder<String, String> result = ImmutableMap.builder(); parameters.forEach((key, value) -> { if (!STATS_PROPERTIES.contains(key)) { result.put(key, value); } }); statistics.getFileCount().ifPresent(count -> result.put(NUM_FILES, Long.toString(count))); statistics.getRowCount().ifPresent(count -> result.put(NUM_ROWS, Long.toString(count))); statistics.getInMemoryDataSizeInBytes().ifPresent(size -> result.put(RAW_DATA_SIZE, Long.toString(size))); statistics.getOnDiskDataSizeInBytes().ifPresent(size -> result.put(TOTAL_SIZE, Long.toString(size))); return result.build(); }
private static ColumnStatisticsObj createBooleanStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { BooleanColumnStatsData data = new BooleanColumnStatsData(); statistics.getNullsCount().ifPresent(data::setNumNulls); statistics.getBooleanStatistics().ifPresent(booleanStatistics -> { booleanStatistics.getFalseCount().ifPresent(data::setNumFalses); booleanStatistics.getTrueCount().ifPresent(data::setNumTrues); }); return new ColumnStatisticsObj(columnName, columnType.toString(), booleanStats(data)); }
@VisibleForTesting static void validatePartitionStatistics(SchemaTableName table, Map<String, PartitionStatistics> partitionStatistics) { partitionStatistics.forEach((partition, statistics) -> { HiveBasicStatistics basicStatistics = statistics.getBasicStatistics(); OptionalLong rowCount = basicStatistics.getRowCount(); rowCount.ifPresent(count -> checkStatistics(count >= 0, table, partition, "rowCount must be greater than or equal to zero: %s", count)); basicStatistics.getFileCount().ifPresent(count -> checkStatistics(count >= 0, table, partition, "fileCount must be greater than or equal to zero: %s", count)); basicStatistics.getInMemoryDataSizeInBytes().ifPresent(size -> checkStatistics(size >= 0, table, partition, "inMemoryDataSizeInBytes must be greater than or equal to zero: %s", size)); basicStatistics.getOnDiskDataSizeInBytes().ifPresent(size -> checkStatistics(size >= 0, table, partition, "onDiskDataSizeInBytes must be greater than or equal to zero: %s", size)); statistics.getColumnStatistics().forEach((column, columnStatistics) -> validateColumnStatistics(table, partition, column, rowCount, columnStatistics)); }); }
private static ColumnStatisticsObj createLongStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { LongColumnStatsData data = new LongColumnStatsData(); statistics.getIntegerStatistics().ifPresent(integerStatistics -> { integerStatistics.getMin().ifPresent(data::setLowValue); integerStatistics.getMax().ifPresent(data::setHighValue); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), longStats(data)); }
private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DecimalColumnStatsData data = new DecimalColumnStatsData(); statistics.getDecimalStatistics().ifPresent(decimalStatistics -> { decimalStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDecimal(value))); decimalStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDecimal(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data)); }
private static ColumnStatisticsObj createDateStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DateColumnStatsData data = new DateColumnStatsData(); statistics.getDateStatistics().ifPresent(dateStatistics -> { dateStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDate(value))); dateStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDate(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), dateStats(data)); }
private static ColumnStatisticsObj createDoubleStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DoubleColumnStatsData data = new DoubleColumnStatsData(); statistics.getDoubleStatistics().ifPresent(doubleStatistics -> { doubleStatistics.getMin().ifPresent(data::setLowValue); doubleStatistics.getMax().ifPresent(data::setHighValue); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), doubleStats(data)); }
private void finishLookupSourceUnspilling() { checkState(state == State.INPUT_UNSPILLING); if (!unspillInProgress.get().isDone()) { // Pages have not be unspilled yet. return; } // Use Queue so that Pages already consumed by Index are not retained by us. Queue<Page> pages = new ArrayDeque<>(getDone(unspillInProgress.get())); long memoryRetainedByRemainingPages = pages.stream() .mapToLong(Page::getRetainedSizeInBytes) .sum(); localUserMemoryContext.setBytes(memoryRetainedByRemainingPages + index.getEstimatedSize().toBytes()); while (!pages.isEmpty()) { Page next = pages.remove(); index.addPage(next); // There is no attempt to compact index, since unspilled pages are unlikely to have blocks with retained size > logical size. memoryRetainedByRemainingPages -= next.getRetainedSizeInBytes(); localUserMemoryContext.setBytes(memoryRetainedByRemainingPages + index.getEstimatedSize().toBytes()); } LookupSourceSupplier partition = buildLookupSource(); lookupSourceChecksum.ifPresent(checksum -> checkState(partition.checksum() == checksum, "Unspilled lookupSource checksum does not match original one")); localUserMemoryContext.setBytes(partition.get().getInMemorySizeInBytes()); spilledLookupSourceHandle.setLookupSource(partition); state = State.INPUT_UNSPILLED_AND_BUILT; }
private static void validateColumnStatistics(SchemaTableName table, String partition, String column, OptionalLong rowCount, HiveColumnStatistics columnStatistics) columnStatistics.getMaxValueSizeInBytes().ifPresent(maxValueSizeInBytes -> checkStatistics(maxValueSizeInBytes >= 0, table, partition, column, "maxValueSizeInBytes must be greater than or equal to zero: %s", maxValueSizeInBytes)); columnStatistics.getTotalSizeInBytes().ifPresent(totalSizeInBytes -> checkStatistics(totalSizeInBytes >= 0, table, partition, column, "totalSizeInBytes must be greater than or equal to zero: %s", totalSizeInBytes)); columnStatistics.getNullsCount().ifPresent(nullsCount -> { checkStatistics(nullsCount >= 0, table, partition, column, "nullsCount must be greater than or equal to zero: %s", nullsCount); if (rowCount.isPresent()) { columnStatistics.getDistinctValuesCount().ifPresent(distinctValuesCount -> { checkStatistics(distinctValuesCount >= 0, table, partition, column, "distinctValuesCount must be greater than or equal to zero: %s", distinctValuesCount); if (rowCount.isPresent()) { OptionalLong falseCount = booleanStatistics.getFalseCount(); OptionalLong trueCount = booleanStatistics.getTrueCount(); falseCount.ifPresent(count -> checkStatistics(count >= 0, table, partition, column, "falseCount must be greater than or equal to zero: %s", count)); trueCount.ifPresent(count -> checkStatistics(count >= 0, table, partition, column, "trueCount must be greater than or equal to zero: %s", count)); if (rowCount.isPresent() && falseCount.isPresent()) {
private static ColumnStatisticsObj createStringStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) { StringColumnStatsData data = new StringColumnStatsData(); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); data.setMaxColLen(statistics.getMaxValueSizeInBytes().orElse(0)); data.setAvgColLen(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0)); return new ColumnStatisticsObj(columnName, columnType.toString(), stringStats(data)); }
private static ColumnStatisticsObj createBinaryStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) { BinaryColumnStatsData data = new BinaryColumnStatsData(); statistics.getNullsCount().ifPresent(data::setNumNulls); data.setMaxColLen(statistics.getMaxValueSizeInBytes().orElse(0)); data.setAvgColLen(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0)); return new ColumnStatisticsObj(columnName, columnType.toString(), binaryStats(data)); }
public void enqueue(List<WriteInfo> allWrites) { Map<Cell, byte[]> referencesToDedicatedCells = new HashMap<>(); Map<Cell, byte[]> cellsToWrite = new HashMap<>(); Map<PartitionInfo, List<WriteInfo>> partitionedWrites = partitioner.filterAndPartition(allWrites); partitionedWrites.forEach((partitionInfo, writes) -> { referencesToDedicatedCells.putAll(populateReferences(partitionInfo, writes)); cellsToWrite.putAll(populateCells(partitionInfo, writes)); }); partitionedWrites.keySet().stream() .map(PartitionInfo::timestamp) .mapToLong(x -> x) .max() .ifPresent(timestamp -> { write(referencesToDedicatedCells, timestamp); write(cellsToWrite, timestamp); updateWriteMetrics(partitionedWrites); }); }
@Override public void update(Transaction tx, TableReference tableRef, UpdateSweepPriority update) { SweepPriorityRow row = SweepPriorityRow.of(tableRef.getQualifiedName()); SweepPriorityTable table = sweepTableFactory.getSweepPriorityTable(tx); update.newStaleValuesDeleted().ifPresent(n -> table.putCellsDeleted(row, n)); update.newCellTsPairsExamined().ifPresent(n -> table.putCellsExamined(row, n)); update.newLastSweepTimeMillis().ifPresent(t -> table.putLastSweepTime(row, t)); update.newMinimumSweptTimestamp().ifPresent(t -> table.putMinimumSweptTimestamp(row, t)); update.newWriteCount().ifPresent(c -> table.putWriteCount(row, c)); }
private void doProfile() { try { pageCacheWarmer.profile().ifPresent( monitor::profileCompleted ); } catch ( Exception e ) { log.debug( "Page cache profiling failed, so no new profile of what data is hot or not was produced. " + "This may reduce the effectiveness of a future page cache warmup process.", e ); } }
private static ColumnStatisticsObj createLongStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { LongColumnStatsData data = new LongColumnStatsData(); statistics.getIntegerStatistics().ifPresent(integerStatistics -> { integerStatistics.getMin().ifPresent(data::setLowValue); integerStatistics.getMax().ifPresent(data::setHighValue); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), longStats(data)); }
@Override public void update(Transaction tx, TableReference tableRef, UpdateSweepPriority update) { SweepPriorityRow row = SweepPriorityRow.of(tableRef.getQualifiedName()); SweepPriorityTable table = sweepTableFactory.getSweepPriorityTable(tx); update.newStaleValuesDeleted().ifPresent(n -> table.putCellsDeleted(row, n)); update.newCellTsPairsExamined().ifPresent(n -> table.putCellsExamined(row, n)); update.newLastSweepTimeMillis().ifPresent(t -> table.putLastSweepTime(row, t)); update.newMinimumSweptTimestamp().ifPresent(t -> table.putMinimumSweptTimestamp(row, t)); update.newWriteCount().ifPresent(c -> table.putWriteCount(row, c)); }
@Override public synchronized void start() { this.lifecycleStamp.ifPresent(stamp -> this.lifecycleLock.unlock(stamp)); this.manager.start(); if (this.statistics != null) { this.statistics.reset(); } }
private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DecimalColumnStatsData data = new DecimalColumnStatsData(); statistics.getDecimalStatistics().ifPresent(decimalStatistics -> { decimalStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDecimal(value))); decimalStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDecimal(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data)); }
/** * Standard catch-all method which delegates leader events to their appropriate handlers * in the appropriate order, i.e. calls step down logic (if necessary) befor leader switch * logic. * * @param outcome The outcome which contains details of the leader event */ default void onLeaderEvent( Outcome outcome ) { outcome.stepDownTerm().ifPresent( this::onLeaderStepDown ); onLeaderSwitch( new LeaderInfo( outcome.getLeader(), outcome.getTerm() ) ); } }
private static ColumnStatisticsObj createDoubleStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DoubleColumnStatsData data = new DoubleColumnStatsData(); statistics.getDoubleStatistics().ifPresent(doubleStatistics -> { doubleStatistics.getMin().ifPresent(data::setLowValue); doubleStatistics.getMax().ifPresent(data::setHighValue); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), doubleStats(data)); }