public static ColumnStatisticsObj readHiveStruct(String columnName, String columnType, StructField structField, Object values) throws HiveException { // Get the field objectInspector, fieldName and the field object. ObjectInspector foi = structField.getFieldObjectInspector(); Object f = values; String fieldName = structField.getFieldName(); ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColName(columnName); statsObj.setColType(columnType); try { unpackStructObject(foi, f, fieldName, statsObj); return statsObj; } catch (Exception e) { throw new HiveException("error calculating stats for column:" + structField.getFieldName(), e); } }
@Override public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWithSourceInfo, List<String> partNames, boolean areAllPartsFound) throws MetaException { ColumnStatisticsObj statsObj = null; String colType = null; String colName = null; BinaryColumnStatsData aggregateData = null; for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { ColumnStatisticsObj cso = csp.getColStatsObj(); if (statsObj == null) { colName = cso.getColName(); colType = cso.getColType(); statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); } BinaryColumnStatsData newData = cso.getStatsData().getBinaryStats(); if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } } ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); columnStatisticsData.setBinaryStats(aggregateData); statsObj.setStatsData(columnStatisticsData); return statsObj; } }
public Object getFieldValue(_Fields field) { switch (field) { case COL_NAME: return getColName(); case COL_TYPE: return getColType(); case STATS_DATA: return getStatsData(); } throw new IllegalStateException(); }
public void setFieldValue(_Fields field, Object value) { switch (field) { case COL_NAME: if (value == null) { unsetColName(); } else { setColName((String)value); } break; case COL_TYPE: if (value == null) { unsetColType(); } else { setColType((String)value); } break; case STATS_DATA: if (value == null) { unsetStatsData(); } else { setStatsData((ColumnStatisticsData)value); } break; } }
static ColumnStatisticsObj getColumnStatisticsObject(String colName, String colType, List<ColumnStatisticsObj> colStats) { if (colStats != null && !colStats.isEmpty()) { for (ColumnStatisticsObj cso : colStats) { if (cso.getColName().equalsIgnoreCase(colName) && cso.getColType().equalsIgnoreCase(colType)) { return cso; } } } return null; }
private ColumnStatisticsObj getDummyLongColStat(String colName, int highVal, int lowVal, int numDVs, int numNulls) { ColumnStatisticsObj aggrColStats = new ColumnStatisticsObj(); aggrColStats.setColName(colName); aggrColStats.setColType("long"); LongColumnStatsData longStatsData = new LongColumnStatsData(); longStatsData.setHighValue(highVal); longStatsData.setLowValue(lowVal); longStatsData.setNumDVs(numDVs); longStatsData.setNumNulls(numNulls); ColumnStatisticsData aggrColStatsData = new ColumnStatisticsData(); aggrColStatsData.setLongStats(longStatsData); aggrColStats.setStatsData(aggrColStatsData); return aggrColStats; } }
@Test public void testMergeLowValuesSecondWins() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, DECIMAL_5, null); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, DECIMAL_3, null); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); }
statsDesc.setPartName(partName); ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); statsObj.setColName(colName[0]); statsObj.setColType(colType[0]); statsObj.setStatsData(statsData); statsObjs.add(statsObj); statsObj = new ColumnStatisticsObj(); statsObj.setColName(colName[1]); statsObj.setColType(colType[1]); statsObj.setStatsData(statsData); statsObjs.add(statsObj); assertEquals(colStats2.getColName(), colName[0]); assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(), lowValue, 0.01); assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(), highValue, 0.01); assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(), numNulls); assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(), numDVs); assertEquals(colStats2.getColName(), colName[1]); assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(), maxColLen); assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(), avgColLen, 0.01); assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(), numNulls); assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(), numDVs);
private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DecimalColumnStatsData data = new DecimalColumnStatsData(); statistics.getDecimalStatistics().ifPresent(decimalStatistics -> { decimalStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDecimal(value))); decimalStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDecimal(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data)); }
private void normalizeColStatsInput(ColumnStatistics colStats) throws MetaException { // TODO: is this really needed? this code is propagated from HIVE-1362 but most of it is useless. ColumnStatisticsDesc statsDesc = colStats.getStatsDesc(); statsDesc.setCatName(statsDesc.isSetCatName() ? statsDesc.getCatName().toLowerCase() : getDefaultCatalog(conf)); statsDesc.setDbName(statsDesc.getDbName().toLowerCase()); statsDesc.setTableName(statsDesc.getTableName().toLowerCase()); statsDesc.setPartName(lowerCaseConvertPartName(statsDesc.getPartName())); long time = System.currentTimeMillis() / 1000; statsDesc.setLastAnalyzed(time); for (ColumnStatisticsObj statsObj : colStats.getStatsObj()) { statsObj.setColName(statsObj.getColName().toLowerCase()); statsObj.setColType(statsObj.getColType().toLowerCase()); } colStats.setStatsDesc(statsDesc); colStats.setStatsObj(colStats.getStatsObj()); }
private ColumnStatisticsData validateSingleColStat(List<ColumnStatisticsObj> statObj) { if (statObj.size() > 1) { Logger.error("More than one stat for a single column!"); return null; } else if (statObj.isEmpty()) { Logger.debug("No stats for some partition and column"); return null; } return statObj.get(0).getStatsData(); }
private ColumnStatisticsObj prepareCSObjWithAdjustedNDV(Object[] row, int i, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException { ColumnStatisticsData data = new ColumnStatisticsData(); ColumnStatisticsObj cso = new ColumnStatisticsObj((String) row[i++], (String) row[i++], data); Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++], declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++], avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++], avgLong = row[i++], avgDouble = row[i++], avgDecimal = row[i++], sumDist = row[i++]; StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data, llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses, avgLong, avgDouble, avgDecimal, sumDist, useDensityFunctionForNDVEstimation, ndvTuner); return cso; }
private static void mergeColumnStats(Map<String, ColumnStatisticsObj> oldStats, ColumnStatistics newStats) { List<ColumnStatisticsObj> newColList = newStats.getStatsObj(); if (newColList != null) { for (ColumnStatisticsObj colStat : newColList) { // This is admittedly a bit simple, StatsObjectConverter seems to allow // old stats attributes to be kept if the new values do not overwrite them. oldStats.put(colStat.getColName().toLowerCase(), colStat); } } }
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); statsData.setLongStats(longStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("double")) { DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); statsData.setDoubleStats(doubleStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("string")) { StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); statsData.setStringStats(stringStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("boolean")) { BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); statsData.setBooleanStats(booleanStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); statsData.setBinaryStats(binaryStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("decimal")) { DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); statsData.setDecimalStats(decimalStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("date")) { DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); statsData.setDateStats(dateStats); statsObj.setStatsData(statsData); if (statsObj.getStatsData().isSetBooleanStats()) {
String colTypeLowerCase = cso.getColType().toLowerCase(); ColStatistics cs = new ColStatistics(colName, colTypeLowerCase); ColumnStatisticsData csd = cso.getStatsData(); if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)
colName = statsObj.getColName().toLowerCase(); statsObj.setColName(colName); startFunction("write_column_statistics: db=" + dbName + " table=" + tableName + " column=" + colName);
public void refreshTableColStats(List<ColumnStatisticsObj> colStatsForTable) { Map<String, ColumnStatisticsObj> newTableColStatsCache = new HashMap<String, ColumnStatisticsObj>(); try { tableLock.writeLock().lock(); for (ColumnStatisticsObj colStatObj : colStatsForTable) { if (isTableColStatsCacheDirty.compareAndSet(true, false)) { LOG.debug("Skipping table col stats cache update for table: " + getTable().getTableName() + "; the table col stats list we have is dirty."); return; } String key = colStatObj.getColName(); // TODO: get rid of deepCopy after making sure callers don't use references newTableColStatsCache.put(key, colStatObj.deepCopy()); } tableColStatsCache = newTableColStatsCache; } finally { tableLock.writeLock().unlock(); } }
private DecimalColumnStatsDataInspector createData(ColumnStatisticsObj objNulls, Decimal lowValue, Decimal highValue) { ColumnStatisticsData statisticsData = new ColumnStatisticsData(); DecimalColumnStatsDataInspector data = new DecimalColumnStatsDataInspector(); statisticsData.setDecimalStats(data); objNulls.setStatsData(statisticsData); data.setLowValue(lowValue); data.setHighValue(highValue); return data; } }
public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) { ColumnStatisticsObj cso = new ColumnStatisticsObj(); ColumnStatisticsData csd = new ColumnStatisticsData(); cso.setColName(colName); cso.setColType(colType); switch (type) { case BOOLEAN_STATS: cso.setStatsData(csd); return cso;
@Test public void testMergeHighValuesFirstWins() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, null, DECIMAL_5); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, null, DECIMAL_3); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue()); }