private static List<ColStatistics> convertColStats(List<ColumnStatisticsObj> colStats, String tabName) { if (colStats==null) { return new ArrayList<ColStatistics>(); } List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size()); for (ColumnStatisticsObj statObj : colStats) { ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName()); if (cs != null) { stats.add(cs); } } return stats; }
static ColumnStatisticsObj getColumnStatisticsObject(String colName, String colType, List<ColumnStatisticsObj> colStats) { if (colStats != null && !colStats.isEmpty()) { for (ColumnStatisticsObj cso : colStats) { if (cso.getColName().equalsIgnoreCase(colName) && cso.getColType().equalsIgnoreCase(colType)) { return cso; } } } return null; }
private static void mergeColumnStats(Map<String, ColumnStatisticsObj> oldStats, ColumnStatistics newStats) { List<ColumnStatisticsObj> newColList = newStats.getStatsObj(); if (newColList != null) { for (ColumnStatisticsObj colStat : newColList) { // This is admittedly a bit simple, StatsObjectConverter seems to allow // old stats attributes to be kept if the new values do not overwrite them. oldStats.put(colStat.getColName().toLowerCase(), colStat); } } }
private static void mergeColumnStats(Map<String, ColumnStatisticsObj> oldStats, ColumnStatistics newStats) { List<ColumnStatisticsObj> newColList = newStats.getStatsObj(); if (newColList != null) { for (ColumnStatisticsObj colStat : newColList) { // This is admittedly a bit simple, StatsObjectConverter seems to allow // old stats attributes to be kept if the new values do not overwrite them. oldStats.put(colStat.getColName().toLowerCase(), colStat); } } }
private static ColumnStatisticsObj getColumnStatisticsObject(String colName, String colType, List<ColumnStatisticsObj> colStats) { if (colStats != null && !colStats.isEmpty()) { for (ColumnStatisticsObj cso : colStats) { if (cso.getColName().equalsIgnoreCase(colName) && cso.getColType().equalsIgnoreCase(colType)) { return cso; } } } return null; }
private static List<ColStatistics> convertColStats(List<ColumnStatisticsObj> colStats, String tabName) { if (colStats==null) { return new ArrayList<ColStatistics>(); } List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size()); for (ColumnStatisticsObj statObj : colStats) { ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName()); if (cs != null) { stats.add(cs); } } return stats; } private static List<String> processNeededColumns(List<ColumnInfo> schema,
public Object getFieldValue(_Fields field) { switch (field) { case COL_NAME: return getColName(); case COL_TYPE: return getColType(); case STATS_DATA: return getStatsData(); } throw new IllegalStateException(); }
public static void getMergableCols(ColumnStatistics csNew, Map<String, String> parameters) { List<ColumnStatisticsObj> list = new ArrayList<>(); for (int index = 0; index < csNew.getStatsObj().size(); index++) { ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); // canColumnStatsMerge guarantees that it is accurate before we do merge if (StatsSetupConst.canColumnStatsMerge(parameters, statsObjNew.getColName())) { list.add(statsObjNew); } // in all the other cases, we can not merge } csNew.setStatsObj(list); }
public void refreshTableColStats(List<ColumnStatisticsObj> colStatsForTable) { Map<String, ColumnStatisticsObj> newTableColStatsCache = new HashMap<String, ColumnStatisticsObj>(); try { tableLock.writeLock().lock(); for (ColumnStatisticsObj colStatObj : colStatsForTable) { if (isTableColStatsCacheDirty.compareAndSet(true, false)) { LOG.debug("Skipping table col stats cache update for table: " + getTable().getTableName() + "; the table col stats list we have is dirty."); return; } String key = colStatObj.getColName(); // TODO: get rid of deepCopy after making sure callers don't use references newTableColStatsCache.put(key, colStatObj.deepCopy()); } tableColStatsCache = newTableColStatsCache; } finally { tableLock.writeLock().unlock(); } }
private List<String> getExistingStatsToUpdate( ColumnStatistics existingStats, Map<String, String> params, boolean isTxnValid) { boolean hasAnyAccurate = isTxnValid && StatsSetupConst.areBasicStatsUptoDate(params); List<String> colsToUpdate = new ArrayList<>(); for (ColumnStatisticsObj obj : existingStats.getStatsObj()) { String col = obj.getColName(); if (!hasAnyAccurate || !StatsSetupConst.areColumnStatsUptoDate(params, col)) { colsToUpdate.add(col); } } return colsToUpdate; }
private void dropExtraColumnStatisticsAfterAlterPartition( String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) { List<String> dataColumns = partitionWithStatistics.getPartition().getColumns().stream() .map(Column::getName) .collect(toImmutableList()); Set<String> columnsWithMissingStatistics = new HashSet<>(dataColumns); columnsWithMissingStatistics.removeAll(partitionWithStatistics.getStatistics().getColumnStatistics().keySet()); // In case new partition had the statistics computed for all the columns, the storePartitionColumnStatistics // call in the alterPartition will just overwrite the old statistics. There is no need to explicitly remove anything. if (columnsWithMissingStatistics.isEmpty()) { return; } // check if statistics for the columnsWithMissingStatistics are actually stored in the metastore // when trying to remove any missing statistics the metastore throws NoSuchObjectException String partitionName = partitionWithStatistics.getPartitionName(); List<ColumnStatisticsObj> statisticsToBeRemoved = getMetastorePartitionColumnStatistics( databaseName, tableName, ImmutableSet.of(partitionName), ImmutableList.copyOf(columnsWithMissingStatistics)) .getOrDefault(partitionName, ImmutableList.of()); for (ColumnStatisticsObj statistics : statisticsToBeRemoved) { deletePartitionColumnStatistics(databaseName, tableName, partitionName, statistics.getColName()); } }
private void compareStatsForOneTableOrPartition(List<ColumnStatisticsObj> objs, final int partOffset, final Map<String, Column> colMap) throws TException { Assert.assertEquals(objs.size(), colMap.size()); objs.forEach(cso -> colMap.get(cso.getColName()).compare(cso, partOffset)); }
private boolean updateTempTableColumnStats(String dbName, String tableName, ColumnStatistics colStats) throws MetaException { SessionState ss = SessionState.get(); if (ss == null) { throw new MetaException("No current SessionState, cannot update temporary table stats for " + StatsUtils.getFullyQualifiedTableName(dbName, tableName)); } Map<String, ColumnStatisticsObj> ssTableColStats = getTempTableColumnStatsForTable(dbName, tableName); if (ssTableColStats == null) { // Add new entry for this table ssTableColStats = new HashMap<String, ColumnStatisticsObj>(); ss.getTempTableColStats().put( StatsUtils.getFullyQualifiedTableName(dbName, tableName), ssTableColStats); } mergeColumnStats(ssTableColStats, colStats); List<String> colNames = new ArrayList<>(); for (ColumnStatisticsObj obj : colStats.getStatsObj()) { colNames.add(obj.getColName()); } org.apache.hadoop.hive.metastore.api.Table table = getTempTable(dbName, tableName); StatsSetupConst.setColumnStatsState(table.getParameters(), colNames); return true; }
private void normalizeColStatsInput(ColumnStatistics colStats) throws MetaException { // TODO: is this really needed? this code is propagated from HIVE-1362 but most of it is useless. ColumnStatisticsDesc statsDesc = colStats.getStatsDesc(); statsDesc.setCatName(statsDesc.isSetCatName() ? statsDesc.getCatName().toLowerCase() : getDefaultCatalog(conf)); statsDesc.setDbName(statsDesc.getDbName().toLowerCase()); statsDesc.setTableName(statsDesc.getTableName().toLowerCase()); statsDesc.setPartName(lowerCaseConvertPartName(statsDesc.getPartName())); long time = System.currentTimeMillis() / 1000; statsDesc.setLastAnalyzed(time); for (ColumnStatisticsObj statsObj : colStats.getStatsObj()) { statsObj.setColName(statsObj.getColName().toLowerCase()); statsObj.setColType(statsObj.getColType().toLowerCase()); } colStats.setStatsDesc(statsDesc); colStats.setStatsObj(colStats.getStatsObj()); }
void compareCommon(ColumnStatisticsObj obj) { Assert.assertEquals(colName, obj.getColName()); Assert.assertEquals(colType, obj.getColType()); }
@Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { StringColumnStatsDataInspector aggregateData = stringInspectorFromStats(aggregateColStats); StringColumnStatsDataInspector newData = stringInspectorFromStats(newColStats); aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); } } }
@Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { LongColumnStatsDataInspector aggregateData = longInspectorFromStats(aggregateColStats); LongColumnStatsDataInspector newData = longInspectorFromStats(newColStats); aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); } } }
@Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { DoubleColumnStatsDataInspector aggregateData = doubleInspectorFromStats(aggregateColStats); DoubleColumnStatsDataInspector newData = doubleInspectorFromStats(newColStats); aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); } } }
@Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { DateColumnStatsDataInspector aggregateData = dateInspectorFromStats(aggregateColStats); DateColumnStatsDataInspector newData = dateInspectorFromStats(newColStats); Date lowValue = min(aggregateData.getLowValue(), newData.getLowValue()); aggregateData.setLowValue(lowValue); Date highValue = max(aggregateData.getHighValue(), newData.getHighValue()); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); NumDistinctValueEstimator newEst = newData.getNdvEstimator(); long ndv = -1; if (oldEst.canMerge(newEst)) { oldEst.mergeEstimators(newEst); ndv = oldEst.estimateNumDistinctValues(); aggregateData.setNdvEstimator(oldEst); } else { ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); } LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); } }
@Override public void checkStats(AggrStats aggrStats) throws Exception { Assert.assertEquals(10, aggrStats.getPartsFound()); Assert.assertEquals(1, aggrStats.getColStatsSize()); ColumnStatisticsObj cso = aggrStats.getColStats().get(0); Assert.assertEquals("col1", cso.getColName()); Assert.assertEquals("bigint", cso.getColType()); LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); Assert.assertEquals(1009, lcsd.getHighValue(), 0.01); Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01); Assert.assertEquals(45, lcsd.getNumNulls()); Assert.assertEquals(91, lcsd.getNumDVs()); } };