public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) { ColumnStatisticsObj cso = new ColumnStatisticsObj(); ColumnStatisticsData csd = new ColumnStatisticsData(); cso.setColName(colName); cso.setColType(colType); switch (type) { case BOOLEAN_STATS: csd.setBooleanStats(new BooleanColumnStatsData()); break; csd.setLongStats(new LongColumnStatsDataInspector()); break; csd.setDateStats(new DateColumnStatsDataInspector()); break; csd.setDoubleStats(new DoubleColumnStatsDataInspector()); break; csd.setStringStats(new StringColumnStatsDataInspector()); break; csd.setBinaryStats(new BinaryColumnStatsData()); break; csd.setDecimalStats(new DecimalColumnStatsDataInspector()); break;
private Long getNullcountFor(StatType type, ColumnStatisticsData statData) { switch(type) { case Integer : return statData.getLongStats().getNumNulls(); case Double: return statData.getDoubleStats().getNumNulls(); case String: return statData.getStringStats().getNumNulls(); case Boolean: return statData.getBooleanStats().getNumNulls(); case Binary: return statData.getBinaryStats().getNumNulls(); case Date: return statData.getDateStats().getNumNulls(); default: return null; } }
/** * Convertes to DecimalColumnStatsDataInspector * if it's a DecimalColumnStatsData. * @param cso ColumnStatisticsObj * @return DecimalColumnStatsDataInspector */ public static DecimalColumnStatsDataInspector decimalInspectorFromStats(ColumnStatisticsObj cso) { DecimalColumnStatsDataInspector columnStats; if (cso.getStatsData().getDecimalStats() instanceof DecimalColumnStatsDataInspector) { columnStats = (DecimalColumnStatsDataInspector)(cso.getStatsData().getDecimalStats()); } else { columnStats = new DecimalColumnStatsDataInspector(cso.getStatsData().getDecimalStats()); } return columnStats; } }
public static ColumnStatisticsData stringStats(StringColumnStatsData value) { ColumnStatisticsData x = new ColumnStatisticsData(); x.setStringStats(value); return x; }
public static ColumnStatisticsData doubleStats(DoubleColumnStatsData value) { ColumnStatisticsData x = new ColumnStatisticsData(); x.setDoubleStats(value); return x; }
public static ColumnStatisticsData longStats(LongColumnStatsData value) { ColumnStatisticsData x = new ColumnStatisticsData(); x.setLongStats(value); return x; }
if (csd.isSetBinaryStats()) { BinaryColumnStatsData bcsd = csd.getBinaryStats(); ret.addAll(Lists.newArrayList( "", "", "" + bcsd.getNumNulls(), "", "", "", convertToString(bcsd.getBitVectors()))); } else if (csd.isSetStringStats()) { StringColumnStatsData scsd = csd.getStringStats(); ret.addAll(Lists.newArrayList( "", "", "" + scsd.getNumNulls(), "" + scsd.getNumDVs(), "", "", convertToString(scsd.getBitVectors()))); } else if (csd.isSetBooleanStats()) { BooleanColumnStatsData bcsd = csd.getBooleanStats(); ret.addAll(Lists.newArrayList( "", "", "" + bcsd.getNumNulls(), "", "" + bcsd.getNumTrues(), "" + bcsd.getNumFalses(), convertToString(bcsd.getBitVectors()))); } else if (csd.isSetDecimalStats()) { DecimalColumnStatsData dcsd = csd.getDecimalStats(); ret.addAll(Lists.newArrayList( convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), "" + dcsd.getNumNulls(), "" + dcsd.getNumDVs(), "", "", convertToString(dcsd.getBitVectors()))); } else if (csd.isSetDoubleStats()) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); ColumnStatisticsData statsData = new ColumnStatisticsData(); statsData.setLongStats(longStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("double")) { DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); statsData.setDoubleStats(doubleStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("string")) { StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); statsData.setStringStats(stringStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("boolean")) { BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); statsData.setBooleanStats(booleanStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("binary")) { BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); statsData.setBinaryStats(binaryStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("decimal")) { DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); statsData.setDecimalStats(decimalStats); statsObj.setStatsData(statsData); } else if (s.equalsIgnoreCase("date")) { DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); statsData.setDateStats(dateStats);
ColumnStatisticsData statsData = new ColumnStatisticsData(); DoubleColumnStatsData numericStats = new DoubleColumnStatsData(); statsData.setDoubleStats(numericStats); statsData.getDoubleStats().setHighValue(highValue); statsData.getDoubleStats().setLowValue(lowValue); statsData.getDoubleStats().setNumDVs(numDVs); statsData.getDoubleStats().setNumNulls(numNulls); statsData = new ColumnStatisticsData(); StringColumnStatsData stringStats = new StringColumnStatsData(); statsData.setStringStats(stringStats); statsData.getStringStats().setAvgColLen(avgColLen); statsData.getStringStats().setMaxColLen(maxColLen); statsData.getStringStats().setNumDVs(numDVs); statsData.getStringStats().setNumNulls(numNulls); assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(), lowValue, 0.01); assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(), highValue, 0.01); assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(), numNulls); assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(), numDVs); assertEquals(colStats.getStatsDesc().getPartName(), partName); assertEquals(colStats2.getColName(), colName[1]); assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(), maxColLen); assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(), avgColLen, 0.01); assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(), numNulls); assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(), numDVs);
List<ColumnStatisticsObj> colStatObjs = new ArrayList<>(); ColumnStatisticsData data = new ColumnStatisticsData(); ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data); LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(50); longStats.setNumDVs(30); data.setLongStats(longStats); colStatObjs.add(colStats); aggrPartVals.add("2"); AggrStats aggrStats = cachedStore.get_aggr_stats_for(DEFAULT_CATALOG_NAME, dbName, tblName, aggrPartVals, colNames); Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 40); aggrStats = cachedStore.get_aggr_stats_for(DEFAULT_CATALOG_NAME, dbName, tblName, aggrPartVals, colNames); Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100); Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 40);
colType = cso.getColType(); statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) { LongColumnStatsDataInspector aggregateData = null; columnStatisticsData.setLongStats(aggregateData); } else { ColumnStatisticsObj cso = csp.getColStatsObj(); String partName = csp.getPartName(); LongColumnStatsData newData = cso.getStatsData().getLongStats(); if (useDensityFunctionForNDVEstimation) { densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); ColumnStatisticsData csd = new ColumnStatisticsData(); csd.setLongStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); if (useDensityFunctionForNDVEstimation) { ColumnStatisticsData csd = new ColumnStatisticsData(); csd.setLongStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); if (useDensityFunctionForNDVEstimation) {
colType = cso.getColType(); statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) { StringColumnStatsDataInspector aggregateData = null; columnStatisticsData.setStringStats(aggregateData); } else { ColumnStatisticsData csd = new ColumnStatisticsData(); csd.setStringStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); ColumnStatisticsData csd = new ColumnStatisticsData(); csd.setStringStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); colName, columnStatisticsData.getStringStats().getNumDVs(), partNames.size(), colStatsWithSourceInfo.size()); statsObj.setStatsData(columnStatisticsData);
@Override public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWithSourceInfo, List<String> partNames, boolean areAllPartsFound) throws MetaException { ColumnStatisticsObj statsObj = null; String colType = null; String colName = null; BinaryColumnStatsData aggregateData = null; for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { ColumnStatisticsObj cso = csp.getColStatsObj(); if (statsObj == null) { colName = cso.getColName(); colType = cso.getColType(); statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); } BinaryColumnStatsData newData = cso.getStatsData().getBinaryStats(); if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } } ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); columnStatisticsData.setBinaryStats(aggregateData); statsObj.setStatsData(columnStatisticsData); return statsObj; } }
colType = cso.getColType(); statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) { DoubleColumnStatsDataInspector aggregateData = null; columnStatisticsData.setDoubleStats(aggregateData); } else { ColumnStatisticsObj cso = csp.getColStatsObj(); String partName = csp.getPartName(); DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats(); if (useDensityFunctionForNDVEstimation) { densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); ColumnStatisticsData csd = new ColumnStatisticsData(); csd.setDoubleStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); if (useDensityFunctionForNDVEstimation) { ColumnStatisticsData csd = new ColumnStatisticsData(); csd.setDoubleStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); if (useDensityFunctionForNDVEstimation) {
ColumnStatisticsData data1 = new ColumnStatisticsData(); ColumnStatisticsObj col1Stats = new ColumnStatisticsObj(col1.getName(), col1.getType(), data1); LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(col1Nulls); longStats.setNumDVs(col1DV); data1.setLongStats(longStats); colStatObjs.add(col1Stats); ColumnStatisticsData data2 = new ColumnStatisticsData(); ColumnStatisticsObj col2Stats = new ColumnStatisticsObj(col2.getName(), col2.getType(), data2); StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); stringStats.setNumNulls(col2Nulls); stringStats.setNumDVs(col2DV); data2.setStringStats(stringStats); colStatObjs.add(col2Stats); ColumnStatisticsData data3 = new ColumnStatisticsData(); ColumnStatisticsObj col3Stats = new ColumnStatisticsObj(col3.getName(), col3.getType(), data3); BooleanColumnStatsData boolStats = new BooleanColumnStatsData(); boolStats.setNumFalses(col3NumFalses); boolStats.setNumNulls(col3Nulls); data3.setBooleanStats(boolStats); colStatObjs.add(col3Stats);
@Override public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWithSourceInfo, List<String> partNames, boolean areAllPartsFound) throws MetaException { ColumnStatisticsObj statsObj = null; String colType = null; String colName = null; BooleanColumnStatsData aggregateData = null; for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { ColumnStatisticsObj cso = csp.getColStatsObj(); if (statsObj == null) { colName = cso.getColName(); colType = cso.getColType(); statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField()); } BooleanColumnStatsData newData = cso.getStatsData().getBooleanStats(); if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { aggregateData.setNumTrues(aggregateData.getNumTrues() + newData.getNumTrues()); aggregateData.setNumFalses(aggregateData.getNumFalses() + newData.getNumFalses()); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } } ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); columnStatisticsData.setBooleanStats(aggregateData); statsObj.setStatsData(columnStatisticsData); return statsObj; }
public static ColumnStatisticsData dateStats(DateColumnStatsData value) { ColumnStatisticsData x = new ColumnStatisticsData(); x.setDateStats(value); return x; }
Map<String, LongColumnStatsData> extractedAdjustedStatsMap = new HashMap<>(); for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) { extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getLongStats()); extrapolateLongData.setNumNulls(numNulls); extrapolateLongData.setNumDVs(ndv); extrapolateData.setLongStats(extrapolateLongData);
/** * Convertes to LongColumnStatsDataInspector if it's a LongColumnStatsData. * @param cso ColumnStatisticsObj * @return LongColumnStatsDataInspector */ public static LongColumnStatsDataInspector longInspectorFromStats(ColumnStatisticsObj cso) { LongColumnStatsDataInspector columnStats; if (cso.getStatsData().getLongStats() instanceof LongColumnStatsDataInspector) { columnStats = (LongColumnStatsDataInspector)(cso.getStatsData().getLongStats()); } else { columnStats = new LongColumnStatsDataInspector(cso.getStatsData().getLongStats()); } return columnStats; }
public static ColumnStatisticsData decimalStats(DecimalColumnStatsData value) { ColumnStatisticsData x = new ColumnStatisticsData(); x.setDecimalStats(value); return x; }