@Test public void testDoubleStatsToColumnStatistics() { DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData(); doubleColumnStatsData.setLowValue(0); doubleColumnStatsData.setHighValue(100); doubleColumnStatsData.setNumNulls(1); doubleColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100)))); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
public Object getFieldValue(_Fields field) { switch (field) { case LOW_VALUE: return getLowValue(); case HIGH_VALUE: return getHighValue(); case NUM_NULLS: return getNumNulls(); case NUM_DVS: return getNumDVs(); case BIT_VECTORS: return getBitVectors(); } throw new IllegalStateException(); }
/** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ public boolean isSet(_Fields field) { if (field == null) { throw new IllegalArgumentException(); } switch (field) { case LOW_VALUE: return isSetLowValue(); case HIGH_VALUE: return isSetHighValue(); case NUM_NULLS: return isSetNumNulls(); case NUM_DVS: return isSetNumDVs(); case BIT_VECTORS: return isSetBitVectors(); } throw new IllegalStateException(); }
case LOW_VALUE: if (value == null) { unsetLowValue(); } else { setLowValue((Double)value); unsetHighValue(); } else { setHighValue((Double)value); unsetNumNulls(); } else { setNumNulls((Long)value); unsetNumDVs(); } else { setNumDVs((Long)value); unsetBitVectors(); } else { setBitVectors((ByteBuffer)value);
OptionalDouble min = doubleStatsData.isSetLowValue() ? OptionalDouble.of(doubleStatsData.getLowValue()) : OptionalDouble.empty(); OptionalDouble max = doubleStatsData.isSetHighValue() ? OptionalDouble.of(doubleStatsData.getHighValue()) : OptionalDouble.empty(); OptionalLong nullsCount = doubleStatsData.isSetNumNulls() ? fromMetastoreNullsCount(doubleStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = doubleStatsData.isSetNumDVs() ? OptionalLong.of(doubleStatsData.getNumDVs()) : OptionalLong.empty(); return createDoubleColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
DoubleColumnStatsData oldDoubleStatsData = oldStatObj.getStatsData().getDoubleStats(); DoubleColumnStatsData newDoubleStatsData = newStatObj.getStatsData().getDoubleStats(); if (newDoubleStatsData.isSetHighValue()) { oldDoubleStatsData.setHighValue(newDoubleStatsData.getHighValue()); if (newDoubleStatsData.isSetLowValue()) { oldDoubleStatsData.setLowValue(newDoubleStatsData.getLowValue()); if (newDoubleStatsData.isSetNumNulls()) { oldDoubleStatsData.setNumNulls(newDoubleStatsData.getNumNulls()); if (newDoubleStatsData.isSetNumDVs()) { oldDoubleStatsData.setNumDVs(newDoubleStatsData.getNumDVs()); if (newDoubleStatsData.isSetBitVectors()) { oldDoubleStatsData.setBitVectors(newDoubleStatsData.getBitVectors());
DoubleColumnStatsData newData = cso.getStatsData().getDoubleStats(); if (useDensityFunctionForNDVEstimation) { densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); ColumnStatisticsData csd = new ColumnStatisticsData(); csd.setDoubleStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); if (useDensityFunctionForNDVEstimation) { densityAvgSum += (aggregateData.getHighValue() - aggregateData.getLowValue()) / aggregateData.getNumDVs(); aggregateData = newData.deepCopy(); } else { aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); ColumnStatisticsData csd = new ColumnStatisticsData(); csd.setDoubleStats(aggregateData); adjustedStatsMap.put(pseudoPartName.toString(), csd); if (useDensityFunctionForNDVEstimation) { densityAvgSum += (aggregateData.getHighValue() - aggregateData.getLowValue()) / aggregateData.getNumDVs(); colName, columnStatisticsData.getDoubleStats().getNumDVs(), partNames.size(), colStatsWithSourceInfo.size()); statsObj.setStatsData(columnStatisticsData);
statsObj.setStatsData(statsData); } else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) { DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); doubleStats.setNumNullsIsSet(false); doubleStats.setNumDVsIsSet(false); doubleStats.setLowValueIsSet(false); doubleStats.setHighValueIsSet(false); Map<String, String> mapProp = work.getMapProp(); for (Entry<String, String> entry : mapProp.entrySet()) { String value = entry.getValue(); if (fName.equals("numNulls")) { doubleStats.setNumNulls(Long.parseLong(value)); } else if (fName.equals("numDVs")) { doubleStats.setNumDVs(Long.parseLong(value)); } else if (fName.equals("lowValue")) { doubleStats.setLowValue(Double.parseDouble(value)); } else if (fName.equals("highValue")) { doubleStats.setHighValue(Double.parseDouble(value)); } else { throw new SemanticException("Unknown stat");
@Test public void testSingleDistinctValue() { DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData(); doubleColumnStatsData.setNumNulls(10); doubleColumnStatsData.setNumDVs(1); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(10)); assertEquals(actual.getNullsCount(), OptionalLong.of(10)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(0)); doubleColumnStatsData = new DoubleColumnStatsData(); doubleColumnStatsData.setNumNulls(10); doubleColumnStatsData.setNumDVs(1); columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(11)); assertEquals(actual.getNullsCount(), OptionalLong.of(10)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(1)); }
@Override ColumnStatisticsObj generate() { DoubleColumnStatsData doubleData = new DoubleColumnStatsData(genNumNulls(), genNumDvs()); doubleData.setLowValue(genLowVal()); doubleData.setHighValue(genHighVal()); ColumnStatisticsData data = new ColumnStatisticsData(); data.setDoubleStats(doubleData); return new ColumnStatisticsObj(colName, colType, data); }
DoubleSubType subType = DoubleSubType.valueOf(name); DoubleColumnStatsData dstats = statData.getDoubleStats(); if (dstats.isSetHighValue()) { oneRow.add(subType.cast(dstats.getHighValue())); } else { oneRow.add(null); if (statData == null) return null; DoubleColumnStatsData dstats = statData.getDoubleStats(); if (!dstats.isSetHighValue()) { continue; double curVal = statData.getDoubleStats().getHighValue(); maxVal = maxVal == null ? curVal : Math.max(maxVal, curVal); DoubleSubType subType = DoubleSubType.valueOf(name); DoubleColumnStatsData dstats = statData.getDoubleStats(); if (dstats.isSetLowValue()) { oneRow.add(subType.cast(dstats.getLowValue())); } else { oneRow.add(null); if (statData == null) return null; DoubleColumnStatsData dstats = statData.getDoubleStats(); if (!dstats.isSetLowValue()) { continue; double curVal = statData.getDoubleStats().getLowValue(); minVal = minVal == null ? curVal : Math.min(minVal, curVal);
return false; boolean this_present_lowValue = true && this.isSetLowValue(); boolean that_present_lowValue = true && that.isSetLowValue(); if (this_present_lowValue || that_present_lowValue) { if (!(this_present_lowValue && that_present_lowValue)) boolean this_present_highValue = true && this.isSetHighValue(); boolean that_present_highValue = true && that.isSetHighValue(); if (this_present_highValue || that_present_highValue) { if (!(this_present_highValue && that_present_highValue)) boolean this_present_bitVectors = true && this.isSetBitVectors(); boolean that_present_bitVectors = true && that.isSetBitVectors(); if (this_present_bitVectors || that_present_bitVectors) { if (!(this_present_bitVectors && that_present_bitVectors))
public DoubleColumnStatsData deepCopy() { return new DoubleColumnStatsData(this); }
@Override public int hashCode() { HashCodeBuilder builder = new HashCodeBuilder(); boolean present_lowValue = true && (isSetLowValue()); builder.append(present_lowValue); if (present_lowValue) builder.append(lowValue); boolean present_highValue = true && (isSetHighValue()); builder.append(present_highValue); if (present_highValue) builder.append(highValue); boolean present_numNulls = true; builder.append(present_numNulls); if (present_numNulls) builder.append(numNulls); boolean present_numDVs = true; builder.append(present_numDVs); if (present_numDVs) builder.append(numDVs); return builder.toHashCode(); }
if (field.type == DOUBLE_STATS_FIELD_DESC.type) { DoubleColumnStatsData doubleStats; doubleStats = new DoubleColumnStatsData(); doubleStats.read(iprot); return doubleStats; } else {
public void write(org.apache.thrift.protocol.TProtocol oprot, DoubleColumnStatsData struct) throws org.apache.thrift.TException { struct.validate(); oprot.writeStructBegin(STRUCT_DESC); if (struct.isSetLowValue()) { oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); oprot.writeDouble(struct.lowValue); oprot.writeFieldEnd(); } if (struct.isSetHighValue()) { oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); oprot.writeDouble(struct.highValue); oprot.writeFieldEnd(); } oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); oprot.writeI64(struct.numNulls); oprot.writeFieldEnd(); oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); oprot.writeI64(struct.numDVs); oprot.writeFieldEnd(); if (struct.bitVectors != null) { if (struct.isSetBitVectors()) { oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); oprot.writeBinary(struct.bitVectors); oprot.writeFieldEnd(); } } oprot.writeFieldStop(); oprot.writeStructEnd(); }
public void write(org.apache.thrift.protocol.TProtocol oprot, DoubleColumnStatsData struct) throws org.apache.thrift.TException { struct.validate(); oprot.writeStructBegin(STRUCT_DESC); if (struct.isSetLowValue()) { oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); oprot.writeDouble(struct.lowValue); oprot.writeFieldEnd(); } if (struct.isSetHighValue()) { oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); oprot.writeDouble(struct.highValue); oprot.writeFieldEnd(); } oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); oprot.writeI64(struct.numNulls); oprot.writeFieldEnd(); oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); oprot.writeI64(struct.numDVs); oprot.writeFieldEnd(); oprot.writeFieldStop(); oprot.writeStructEnd(); }
private Long getNullcountFor(StatType type, ColumnStatisticsData statData) { switch(type) { case Integeral : return statData.getLongStats().getNumNulls(); case Double: return statData.getDoubleStats().getNumNulls(); case String: return statData.getStringStats().getNumNulls(); case Boolean: return statData.getBooleanStats().getNumNulls(); case Binary: return statData.getBinaryStats().getNumNulls(); default: return null; } }
private static void unpackDoubleStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { if (fName.equals("countnulls")) { long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getDoubleStats().setNumNulls(v); } else if (fName.equals("numdistinctvalues")) { long v = ((LongObjectInspector) oi).get(o); statsObj.getStatsData().getDoubleStats().setNumDVs(v); } else if (fName.equals("max")) { double d = ((DoubleObjectInspector) oi).get(o); if (Double.isInfinite(d) || Double.isNaN(d)) { throw new UnsupportedDoubleException(); } statsObj.getStatsData().getDoubleStats().setHighValue(d); } else if (fName.equals("min")) { double d = ((DoubleObjectInspector) oi).get(o); if (Double.isInfinite(d) || Double.isNaN(d)) { throw new UnsupportedDoubleException(); } statsObj.getStatsData().getDoubleStats().setLowValue(d); } else if (fName.equals("ndvbitvector")) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); statsObj.getStatsData().getDoubleStats().setBitVectors(buf); ; } }
DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats(); mColStats.setDoubleStats( doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null); } else if (statsObj.getStatsData().isSetDecimalStats()) { DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();