private BlockletMinMaxIndex buildMinMaxIndex(ColumnPage inputPage, List<Encoding> encoders) { BlockletMinMaxIndex index = new BlockletMinMaxIndex(); ByteBuffer max; ByteBuffer min; if (CarbonUtil.isEncodedWithMeta(encoders) && inputPage.getColumnSpec().getColumnType() == ColumnType.PLAIN_VALUE) { max = ByteBuffer.wrap(DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(inputPage.getStatistics().getMax(), inputPage.getDataType())); min = ByteBuffer.wrap(DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(inputPage.getStatistics().getMin(), inputPage.getDataType())); } else { byte[] bytes = CarbonUtil.getValueAsBytes(inputPage.getDataType(), inputPage.getStatistics().getMax()); max = ByteBuffer.wrap(bytes); min = ByteBuffer.wrap( CarbonUtil.getValueAsBytes(inputPage.getDataType(), inputPage.getStatistics().getMin())); } index.addToMax_values(max); index.addToMin_values(min); index.addToMin_max_presence(inputPage.getStatistics().writeMinMax()); return index; }
switch (columnSpec.getColumnType()) { case COMPLEX_ARRAY: case COMPLEX_STRUCT:
private byte[] getChunkDataInBytes(int rowId) { ColumnType columnType = columnPage.getColumnSpec().getColumnType(); DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); DataType targetDataType = columnPage.getDataType();
return newDoublePage(meta, doubleData); } else if (!isLVEncoded && storeDataType == DataTypes.BYTE_ARRAY && ( columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE || columnSpec.getColumnType() == ColumnType.PLAIN_VALUE)) { byte[] lvVarBytes = compressor.unCompressByte(compressedData, offset, length); return newComplexLVBytesPage(columnSpec, lvVarBytes, CarbonCommonConstants.SHORT_SIZE_IN_BYTE, meta.getCompressorName()); } else if (isLVEncoded && storeDataType == DataTypes.BYTE_ARRAY && columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { byte[] lvVarBytes = compressor.unCompressByte(compressedData, offset, length); return newFixedByteArrayPage(columnSpec, lvVarBytes, 3, meta.getCompressorName()); } else if (storeDataType == DataTypes.BYTE_ARRAY && columnSpec.getColumnType() == ColumnType.COMPLEX_STRUCT) { byte[] lvVarBytes = compressor.unCompressByte(compressedData, offset, length); return newFixedByteArrayPage(columnSpec, lvVarBytes, CarbonCommonConstants.SHORT_SIZE_IN_BYTE, meta.getCompressorName()); } else if (storeDataType == DataTypes.BYTE_ARRAY && columnSpec.getColumnType() == ColumnType.COMPLEX_ARRAY) { byte[] lvVarBytes = compressor.unCompressByte(compressedData, offset, length); return newFixedByteArrayPage(columnSpec, lvVarBytes, CarbonCommonConstants.LONG_SIZE_IN_BYTE, meta.getCompressorName()); } else if (storeDataType == DataTypes.BYTE_ARRAY && columnSpec.getColumnType() == ColumnType.PLAIN_LONG_VALUE) { byte[] lvVarBytes = compressor.unCompressByte(compressedData, offset, length); return newLVBytesPage(columnSpec, lvVarBytes,
return compressor.compressByte(getDecimalPage()); } else if (dataType == DataTypes.BYTE_ARRAY && columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { return compressor.compressByte(getComplexChildrenLVFlattenedBytePage()); } else if (dataType == DataTypes.BYTE_ARRAY && (columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_STRUCT || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_ARRAY || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_LONG_VALUE || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_VALUE)) { return compressor.compressByte(getComplexParentFlattenedBytePage()); } else if (dataType == DataTypes.BYTE_ARRAY) {
return getDecimalPage().length; } else if (dataType == DataTypes.BYTE_ARRAY && columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { return getComplexChildrenLVFlattenedBytePage().length; } else if (dataType == DataTypes.BYTE_ARRAY && (columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_STRUCT || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_ARRAY || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_LONG_VALUE || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_VALUE)) { return getComplexParentFlattenedBytePage().length; } else if (dataType == DataTypes.BYTE_ARRAY) {
isInvertedIndex(columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE, columnSpec)); } else { isInvertedIndex(columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE, columnSpec));
private static ColumnPageEncoder createCodecForDimension(ColumnPage inputPage) { TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec(); if (columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { if (inputPage.getDataType() == DataTypes.BYTE_ARRAY || inputPage.getDataType() == DataTypes.STRING) { // use legacy encoder return null; } else if ((inputPage.getDataType() == DataTypes.BYTE) || (inputPage.getDataType() == DataTypes.SHORT) || (inputPage.getDataType() == DataTypes.INT) || ( inputPage.getDataType() == DataTypes.LONG)) { return selectCodecByAlgorithmForIntegral(inputPage.getStatistics(), true, columnSpec) .createEncoder(null); } else if ((inputPage.getDataType() == DataTypes.FLOAT) || (inputPage.getDataType() == DataTypes.DOUBLE)) { return selectCodecByAlgorithmForFloating(inputPage.getStatistics(), true, columnSpec) .createEncoder(null); } } // use legacy encoder return null; }
@Override public int compareTo(int rowId, byte[] compareValue) { // rowId is the inverted index, but the null bitset is based on actual data int nullBitSetRowId = rowId; if (isExplicitSorted()) { nullBitSetRowId = getInvertedIndex(rowId); } byte[] nullBitSet = getNullBitSet(nullBitSetRowId, columnPage.getColumnSpec().getColumnType()); if (nullBitSet != null && ByteUtil.UnsafeComparer.INSTANCE.compareTo(nullBitSet, compareValue) == 0) { // check if the compare value is a null value // if the compare value is null and the data is also null we can directly return 0 return 0; } else { byte[] chunkData = this.getChunkDataInBytes(rowId); return ByteUtil.UnsafeComparer.INSTANCE.compareTo(chunkData, compareValue); } }
@Override public ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec, ColumnPage inputPage) { // TODO: add log // choose the encoding type for measure type and no dictionary primitive type columns if (columnSpec instanceof TableSpec.MeasureSpec || ( DataTypeUtil.isPrimitiveColumn(columnSpec.getSchemaDataType()) && columnSpec.getColumnType() == ColumnType.PLAIN_VALUE)) { return createEncoderForMeasureOrNoDictionaryPrimitive(inputPage, columnSpec); } else { if (newWay) { return createEncoderForDimension((TableSpec.DimensionSpec) columnSpec, inputPage); } else { assert columnSpec instanceof TableSpec.DimensionSpec; return createEncoderForDimensionLegacy((TableSpec.DimensionSpec) columnSpec); } } }
@Override public byte[] getChunkData(int rowId) { byte[] nullBitSet = getNullBitSet(rowId, columnPage.getColumnSpec().getColumnType()); if (nullBitSet != null) { // if this row is null, return default null represent in byte array return nullBitSet; } else { if (isExplicitSorted()) { rowId = getInvertedReverseIndex(rowId); } return getChunkDataInBytes(rowId); } }