public DataChunk2 deepCopy() { return new DataChunk2(this); }
public Object getFieldValue(_Fields field) { switch (field) { case CHUNK_META: return getChunk_meta(); case ROW_MAJOR: return isRowMajor(); case DATA_PAGE_LENGTH: return getData_page_length(); case ROWID_PAGE_LENGTH: return getRowid_page_length(); case RLE_PAGE_LENGTH: return getRle_page_length(); case PRESENCE: return getPresence(); case SORT_STATE: return getSort_state(); case ENCODERS: return getEncoders(); case ENCODER_META: return getEncoder_meta(); case MIN_MAX: return getMin_max(); case NUMBER_OF_ROWS_INPAGE: return getNumberOfRowsInpage(); } throw new IllegalStateException(); }
case CHUNK_META: if (value == null) { unsetChunk_meta(); } else { setChunk_meta((ChunkCompressionMeta)value); unsetRowMajor(); } else { setRowMajor((Boolean)value); unsetData_page_length(); } else { setData_page_length((Integer)value); unsetRowid_page_length(); } else { setRowid_page_length((Integer)value); unsetRle_page_length(); } else { setRle_page_length((Integer)value); unsetPresence(); } else { setPresence((PresenceMeta)value); unsetSort_state(); } else { setSort_state((SortState)value);
private void fillBasicFields(ColumnPage inputPage, DataChunk2 dataChunk) throws IOException { dataChunk.setChunk_meta(CarbonMetadataUtil.getChunkCompressorMeta(inputPage, dataChunk.getData_page_length())); dataChunk.setNumberOfRowsInpage(inputPage.getPageSize()); dataChunk.setRowMajor(false); }
private ColumnPage decodeDimensionByMeta(DataChunk2 pageMetadata, ByteBuffer pageData, int offset, boolean isLocalDictEncodedPage, ColumnVectorInfo vectorInfo, BitSet nullBitSet, ReusableDataBuffer reusableDataBuffer) throws IOException, MemoryException { List<Encoding> encodings = pageMetadata.getEncoders(); List<ByteBuffer> encoderMetas = pageMetadata.getEncoder_meta(); String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta( pageMetadata.getChunk_meta()); ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, compressorName, vectorInfo != null); if (vectorInfo != null) { decoder .decodeAndFillVector(pageData.array(), offset, pageMetadata.data_page_length, vectorInfo, nullBitSet, isLocalDictEncodedPage, pageMetadata.numberOfRowsInpage, reusableDataBuffer); return null; } else { return decoder .decode(pageData.array(), offset, pageMetadata.data_page_length, isLocalDictEncodedPage); } }
/** * return DataChunk3 that contains the input DataChunk2 list */ public static DataChunk3 getDataChunk3(List<DataChunk2> dataChunksList, LocalDictionaryChunk encodedDictionary) { int offset = 0; DataChunk3 dataChunk = new DataChunk3(); List<Integer> pageOffsets = new ArrayList<>(); List<Integer> pageLengths = new ArrayList<>(); int length = 0; for (DataChunk2 dataChunk2 : dataChunksList) { pageOffsets.add(offset); length = dataChunk2.getData_page_length() + dataChunk2.getRle_page_length() + dataChunk2.getRowid_page_length(); pageLengths.add(length); offset += length; } dataChunk.setLocal_dictionary(encodedDictionary); dataChunk.setData_chunk_list(dataChunksList); dataChunk.setPage_length(pageLengths); dataChunk.setPage_offset(pageOffsets); return dataChunk; }
MeasureRawColumnChunk getMeasureRawColumnChunk(FileReader fileReader, int columnIndex, long offset, int dataLength, ByteBuffer buffer, DataChunk3 dataChunk) { // creating a raw chunks instance and filling all the details MeasureRawColumnChunk rawColumnChunk = new MeasureRawColumnChunk(columnIndex, buffer, offset, dataLength, this); int numberOfPages = dataChunk.getPage_length().size(); byte[][] maxValueOfEachPage = new byte[numberOfPages][]; byte[][] minValueOfEachPage = new byte[numberOfPages][]; int[] eachPageLength = new int[numberOfPages]; for (int i = 0; i < minValueOfEachPage.length; i++) { maxValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMax_values().get(0).array(); minValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMin_values().get(0).array(); eachPageLength[i] = dataChunk.getData_chunk_list().get(i).getNumberOfRowsInpage(); } rawColumnChunk.setDataChunkV3(dataChunk); rawColumnChunk.setFileReader(fileReader); rawColumnChunk.setPagesCount(dataChunk.getPage_length().size()); rawColumnChunk.setMaxValues(maxValueOfEachPage); rawColumnChunk.setMinValues(minValueOfEachPage); rawColumnChunk.setRowCount(eachPageLength); rawColumnChunk.setOffsets(ArrayUtils .toPrimitive(dataChunk.page_offset.toArray(new Integer[dataChunk.page_offset.size()]))); return rawColumnChunk; }
private DataChunk2 buildPageMetadata(ColumnPage inputPage, byte[] encodedBytes) throws IOException { DataChunk2 dataChunk = new DataChunk2(); dataChunk.setData_page_length(encodedBytes.length); fillBasicFields(inputPage, dataChunk); fillNullBitSet(inputPage, dataChunk); fillEncoding(inputPage, dataChunk); fillMinMaxIndex(inputPage, dataChunk); fillLegacyFields(dataChunk); return dataChunk; }
for (int _i74 = 0; _i74 < _list72.size; ++_i74) _elem73 = new DataChunk2(); _elem73.read(iprot); struct.data_chunk_list.add(_elem73);
private DimensionColumnPage decodeColumnPage( DimensionRawColumnChunk rawColumnPage, int pageNumber, ColumnVectorInfo vectorInfo, ReusableDataBuffer reusableDataBuffer) throws IOException, MemoryException { // data chunk of blocklet column DataChunk3 dataChunk3 = rawColumnPage.getDataChunkV3(); // get the data buffer ByteBuffer rawData = rawColumnPage.getRawData(); DataChunk2 pageMetadata = dataChunk3.getData_chunk_list().get(pageNumber); String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta( pageMetadata.getChunk_meta()); this.compressor = CompressorFactory.getInstance().getCompressor(compressorName); // calculating the start point of data // as buffer can contain multiple column data, start point will be datachunkoffset + // data chunk length + page offset int offset = (int) rawColumnPage.getOffSet() + dimensionChunksLength .get(rawColumnPage.getColumnIndex()) + dataChunk3.getPage_offset().get(pageNumber); // first read the data and uncompressed it return decodeDimension(rawColumnPage, rawData, pageMetadata, offset, vectorInfo, reusableDataBuffer); }
public boolean isEncodedWithAdaptiveMeta(DataChunk2 pageMetadata) { List<Encoding> encodings = pageMetadata.getEncoders(); if (encodings != null && !encodings.isEmpty()) { Encoding encoding = encodings.get(0); switch (encoding) { case ADAPTIVE_INTEGRAL: case ADAPTIVE_DELTA_INTEGRAL: case ADAPTIVE_FLOATING: case ADAPTIVE_DELTA_FLOATING: return true; } } return false; }
protected ColumnPage decodeMeasure(MeasureRawColumnChunk measureRawColumnChunk, DataChunk2 measureColumnChunk, int copyPoint, ReusableDataBuffer reusableDataBuffer) throws MemoryException, IOException { assert (measureColumnChunk.getEncoder_meta().size() > 0); List<ByteBuffer> encoder_meta = measureColumnChunk.getEncoder_meta(); byte[] encodedMeta = encoder_meta.get(0).array(); ValueEncoderMeta meta = CarbonUtil.deserializeEncoderMetaV2(encodedMeta); ColumnPageDecoder codec = encodingFactory.createDecoderLegacy(meta, CompressorFactory.NativeSupportedCompressor.SNAPPY.getName()); byte[] rawData = measureRawColumnChunk.getRawData().array(); return codec .decode(rawData, copyPoint, measureColumnChunk.data_page_length); } }
columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, pageMetadata.getNumberOfRowsInpage(), dimStoreType, rawColumnPage.getLocalDictionary(), vectorInfo, uncompressedSize); } else { pageMetadata.getNumberOfRowsInpage(), eachColumnValueSize[rawColumnPage.getColumnIndex()], vectorInfo, uncompressedSize);
/** * Decode measure column page with page header and raw data starting from offset */ protected ColumnPage decodeMeasure(DataChunk2 pageMetadata, ByteBuffer pageData, int offset, ColumnVectorInfo vectorInfo, BitSet nullBitSet, ReusableDataBuffer reusableDataBuffer) throws MemoryException, IOException { List<Encoding> encodings = pageMetadata.getEncoders(); org.apache.carbondata.core.metadata.encoder.Encoding.validateEncodingTypes(encodings); List<ByteBuffer> encoderMetas = pageMetadata.getEncoder_meta(); String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta(pageMetadata.getChunk_meta()); ColumnPageDecoder codec = encodingFactory.createDecoder(encodings, encoderMetas, compressorName, vectorInfo != null); if (vectorInfo != null) { codec.decodeAndFillVector(pageData.array(), offset, pageMetadata.data_page_length, vectorInfo, nullBitSet, false, pageMetadata.numberOfRowsInpage, reusableDataBuffer); return null; } else { return codec .decode(pageData.array(), offset, pageMetadata.data_page_length); } } }
for (int i = 0; i < minValueOfEachPage.length; i++) { maxValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMax_values().get(0).array(); minValueOfEachPage[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMin_values().get(0).array(); eachPageLength[i] = dataChunk.getData_chunk_list().get(i).getNumberOfRowsInpage(); boolean isMinMaxFlagSet = dataChunk.getData_chunk_list().get(i).getMin_max().isSetMin_max_presence(); if (isMinMaxFlagSet) { minMaxFlag[i] = dataChunk.getData_chunk_list().get(i).getMin_max().getMin_max_presence().get(0);
for (int _i89 = 0; _i89 < _list87.size; ++_i89) _elem88 = new DataChunk2(); _elem88.read(iprot); struct.data_chunk_list.add(_elem88);
private ColumnPage decodeColumnPage(MeasureRawColumnChunk rawColumnChunk, int pageNumber, ColumnVectorInfo vectorInfo, ReusableDataBuffer reusableDataBuffer) throws IOException, MemoryException { // data chunk of blocklet column DataChunk3 dataChunk3 = rawColumnChunk.getDataChunkV3(); // data chunk of page DataChunk2 pageMetadata = dataChunk3.getData_chunk_list().get(pageNumber); String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta( pageMetadata.getChunk_meta()); this.compressor = CompressorFactory.getInstance().getCompressor(compressorName); // calculating the start point of data // as buffer can contain multiple column data, start point will be datachunkoffset + // data chunk length + page offset int offset = (int) rawColumnChunk.getOffSet() + measureColumnChunkLength.get(rawColumnChunk.getColumnIndex()) + dataChunk3.getPage_offset().get(pageNumber); BitSet nullBitSet = QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor); ColumnPage decodedPage = decodeMeasure(pageMetadata, rawColumnChunk.getRawData(), offset, vectorInfo, nullBitSet, reusableDataBuffer); if (decodedPage == null) { return null; } decodedPage.setNullBits(nullBitSet); return decodedPage; }
private void updateDimensionMinMax(EncodedColumnPage[] dimensions) { for (int i = 0; i < dimensions.length; i++) { SimpleStatsResult stats = dimensions[i].getStats(); Object min = stats.getMin(); Object max = stats.getMax(); if (CarbonUtil.isEncodedWithMeta(dimensions[i].getPageMetadata().getEncoders())) { dimensionMaxValue[i] = DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(max, stats.getDataType()); dimensionMinValue[i] = DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(min, stats.getDataType()); } else { dimensionMaxValue[i] = CarbonUtil.getValueAsBytes(stats.getDataType(), max); dimensionMinValue[i] = CarbonUtil.getValueAsBytes(stats.getDataType(), min); } writeMinMaxForDimensions[i] = stats.writeMinMax(); } }
/** * Below method will be used to convert the compressed measure chunk raw data to actual data * * @param rawColumnPage measure raw chunk * @param pageNumber number * @return DimensionColumnDataChunk */ @Override public ColumnPage decodeColumnPage( MeasureRawColumnChunk rawColumnPage, int pageNumber, ReusableDataBuffer reusableDataBuffer) throws IOException, MemoryException { // data chunk of blocklet column DataChunk3 dataChunk3 = rawColumnPage.getDataChunkV3(); // data chunk of page DataChunk2 pageMetadata = dataChunk3.getData_chunk_list().get(pageNumber); String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta( pageMetadata.getChunk_meta()); this.compressor = CompressorFactory.getInstance().getCompressor(compressorName); // calculating the start point of data // as buffer can contain multiple column data, start point will be datachunkoffset + // data chunk length + page offset long offset = rawColumnPage.getOffSet() + measureColumnChunkLength .get(rawColumnPage.getColumnIndex()) + dataChunk3.getPage_offset().get(pageNumber); ByteBuffer buffer = rawColumnPage.getFileReader() .readByteBuffer(filePath, offset, pageMetadata.data_page_length); BitSet nullBitSet = QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor); ColumnPage decodedPage = decodeMeasure(pageMetadata, buffer, 0, null, nullBitSet, reusableDataBuffer); decodedPage.setNullBits(nullBitSet); return decodedPage; }
ReusableDataBuffer reusableDataBuffer) throws IOException, MemoryException { List<Encoding> encodings = pageMetadata.getEncoders(); org.apache.carbondata.core.metadata.encoder.Encoding.validateEncodingTypes(encodings); if (CarbonUtil.isEncodedWithMeta(encodings)) {