private ColumnStatisticsData validateSingleColStat(List<ColumnStatisticsObj> statObj) { if (statObj.size() > 1) { Logger.error("More than one stat for a single column!"); return null; } else if (statObj.isEmpty()) { Logger.debug("No stats for some partition and column"); return null; } return statObj.get(0).getStatsData(); }
public Object getFieldValue(_Fields field) { switch (field) { case COL_NAME: return getColName(); case COL_TYPE: return getColType(); case STATS_DATA: return getStatsData(); } throw new IllegalStateException(); }
private static void unpackBooleanStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { long v = ((LongObjectInspector) oi).get(o); if (fName.equals("counttrues")) { statsObj.getStatsData().getBooleanStats().setNumTrues(v); } else if (fName.equals("countfalses")) { statsObj.getStatsData().getBooleanStats().setNumFalses(v); } else if (fName.equals("countnulls")) { statsObj.getStatsData().getBooleanStats().setNumNulls(v); } }
private static void unpackBinaryStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { if (fName.equals("countnulls")) { long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getBinaryStats().setNumNulls(v); } else if (fName.equals("avglength")) { double d = ((DoubleObjectInspector) oi).get(o); statsObj.getStatsData().getBinaryStats().setAvgColLen(d); } else if (fName.equals("maxlength")) { long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getBinaryStats().setMaxColLen(v); } }
@Override public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { BinaryColumnStatsData aggregateData = aggregateColStats.getStatsData().getBinaryStats(); BinaryColumnStatsData newData = newColStats.getStatsData().getBinaryStats(); aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } }
@Override void compareAggr(ColumnStatisticsObj obj) { compareCommon(obj); Assert.assertEquals("aggr long num nulls", getNumNulls(), obj.getStatsData().getLongStats().getNumNulls()); Assert.assertEquals("aggr long num dvs", getNumDvs(), obj.getStatsData().getLongStats().getNumDVs()); Assert.assertEquals("aggr long low val", getLowVal(), obj.getStatsData().getLongStats().getLowValue()); Assert.assertEquals("aggr long high val", getHighVal(), obj.getStatsData().getLongStats().getHighValue()); }
@Override void compareAggr(ColumnStatisticsObj obj) { compareCommon(obj); Assert.assertEquals("aggr double num nulls", getNumNulls(), obj.getStatsData().getDoubleStats().getNumNulls()); Assert.assertEquals("aggr double num dvs", getNumDvs(), obj.getStatsData().getDoubleStats().getNumDVs()); Assert.assertEquals("aggr double low val", getLowVal(), obj.getStatsData().getDoubleStats().getLowValue(), 0.01); Assert.assertEquals("aggr double high val", getHighVal(), obj.getStatsData().getDoubleStats().getHighValue(), 0.01); }
@Override void compareAggr(ColumnStatisticsObj obj) { compareCommon(obj); Assert.assertEquals("aggr binary max length", getMaxLen(), obj.getStatsData().getBinaryStats().getMaxColLen()); Assert.assertEquals("aggr binary min length", getAvgLen(), obj.getStatsData().getBinaryStats().getAvgColLen(), 0.01); Assert.assertEquals("aggr binary num nulls", getNumNulls(), obj.getStatsData().getBinaryStats().getNumNulls()); } }
@Override void compareAggr(ColumnStatisticsObj obj) { compareCommon(obj); Assert.assertEquals("aggr boolean num trues", getNumTrues(), obj.getStatsData().getBooleanStats().getNumTrues()); Assert.assertEquals("aggr boolean num falses", getNumFalses(), obj.getStatsData().getBooleanStats().getNumFalses()); Assert.assertEquals("aggr boolean num nulls", getNumNulls(), obj.getStatsData().getBooleanStats().getNumNulls()); }
private void verifyLongStats(int dvCount, int min, int max, List<ColumnStatisticsObj> stats) { Assert.assertEquals(1, stats.size()); LongColumnStatsData data = stats.get(0).getStatsData().getLongStats(); Assert.assertEquals(min, data.getLowValue()); Assert.assertEquals(max, data.getHighValue()); Assert.assertEquals(dvCount, data.getNumDVs()); }
@Test public void testMergeNullMinMaxValues() { ColumnStatisticsObj objNulls = new ColumnStatisticsObj(); createData(objNulls, null, null); merger.merge(objNulls, objNulls); Assert.assertNull(objNulls.getStatsData().getDecimalStats().getLowValue()); Assert.assertNull(objNulls.getStatsData().getDecimalStats().getHighValue()); }
@Test public void testMergeNullMinMaxValues() { ColumnStatisticsObj old = new ColumnStatisticsObj(); createData(old, null, null); merger.merge(old, old); Assert.assertNull(old.getStatsData().getDateStats().getLowValue()); Assert.assertNull(old.getStatsData().getDateStats().getHighValue()); }
@Test public void testMergeNonNullAndNullHigherValuesOldIsNull() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, null, null); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, null, DECIMAL_3); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue()); }
@Test public void testMergeLowValuesFirstWins() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, DECIMAL_3, null); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, DECIMAL_5, null); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); }
@Test public void testMergeHighValuesSecondWins() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, null, DECIMAL_3); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, null, DECIMAL_5); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue()); }
@Test public void testMergeLowValuesSecondWins() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, DECIMAL_5, null); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, DECIMAL_3, null); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); }
@Test public void testMergeHighValuesFirstWins() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, null, DECIMAL_5); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, null, DECIMAL_3); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_5, oldObj.getStatsData().getDecimalStats().getHighValue()); }
@Test public void testMergeNonNullAndNullLowerValuesOldIsNull() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, null, null); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, DECIMAL_3, null); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); }
@Test public void testMergeNonNullAndNullLowerValuesNewIsNull() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, DECIMAL_3, null); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, null, null); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue()); }
@Test public void testMergeNonNullAndNullHigherValuesNewIsNull() { ColumnStatisticsObj oldObj = new ColumnStatisticsObj(); createData(oldObj, null, DECIMAL_3); ColumnStatisticsObj newObj = new ColumnStatisticsObj(); createData(newObj, null, null); merger.merge(oldObj, newObj); Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue()); }