/** * Below method will be used to get the block index info thrift object for * each block present in the segment * * @param blockIndexInfoList block index info list * @return list of block index */ public static List<BlockIndex> getBlockIndexInfo(List<BlockIndexInfo> blockIndexInfoList) { List<BlockIndex> thriftBlockIndexList = new ArrayList<BlockIndex>(); BlockIndex blockIndex = null; // below code to create block index info object for each block for (BlockIndexInfo blockIndexInfo : blockIndexInfoList) { blockIndex = new BlockIndex(); blockIndex.setNum_rows(blockIndexInfo.getNumberOfRows()); blockIndex.setOffset(blockIndexInfo.getOffset()); blockIndex.setFile_name(blockIndexInfo.getFileName()); blockIndex.setBlock_index(getBlockletIndex(blockIndexInfo.getBlockletIndex())); if (blockIndexInfo.getBlockletInfo() != null) { blockIndex.setBlocklet_info(getBlocletInfo3(blockIndexInfo.getBlockletInfo())); } thriftBlockIndexList.add(blockIndex); } return thriftBlockIndexList; }
/** * write file header */ private void writeHeaderToFile() throws IOException { byte[] fileHeader = CarbonUtil.getByteArray( CarbonMetadataUtil.getFileHeader( true, thriftColumnSchemaList, model.getSchemaUpdatedTimeStamp())); ByteBuffer buffer = ByteBuffer.wrap(fileHeader); currentOffsetInFile += fileChannel.write(buffer); }
mergeWriteMinMaxFlagForAllPages(blockletMinMaxIndex, encodedBlocklet); new TablePageStatistics(getEncodedColumnPages(encodedBlocklet, true, 0), getEncodedColumnPages(encodedBlocklet, false, 0)); byte[][] minCol = stats.getDimensionMinValue().clone(); byte[][] maxCol = stats.getDimensionMaxValue().clone(); for (int pageIndex = 0; pageIndex < encodedBlocklet.getNumberOfPages(); pageIndex++) { stats = new TablePageStatistics(getEncodedColumnPages(encodedBlocklet, true, pageIndex), getEncodedColumnPages(encodedBlocklet, false, pageIndex)); byte[][] columnMaxData = stats.getDimensionMaxValue(); byte[][] columnMinData = stats.getDimensionMinValue(); stats = new TablePageStatistics(getEncodedColumnPages(encodedBlocklet, true, 0), getEncodedColumnPages(encodedBlocklet, false, 0)); byte[][] measureMaxValue = stats.getMeasureMaxValue().clone(); byte[][] measureMinValue = stats.getMeasureMinValue().clone(); for (int i = 1; i < encodedBlocklet.getNumberOfPages(); i++) { for (int j = 0; j < measureMinValue.length; j++) { stats = new TablePageStatistics(getEncodedColumnPages(encodedBlocklet, true, i), getEncodedColumnPages(encodedBlocklet, false, i)); minVal = stats.getMeasureMinValue()[j]; maxVal = stats.getMeasureMaxValue()[j]; if (compareMeasureData(measureMaxValue[j], maxVal, carbonMeasureList.get(j).getDataType()) < 0) { measureMaxValue[j] = maxVal.clone(); if (compareMeasureData(measureMinValue[j], minVal, carbonMeasureList.get(j).getDataType()) > 0) {
private boolean isScanRequired(BlockletHeader header) { if (filter != null && header.getBlocklet_index() != null) { BlockletMinMaxIndex minMaxIndex = CarbonMetadataUtil.convertExternalMinMaxIndex( header.getBlocklet_index().getMin_max_index()); if (minMaxIndex != null) { BitSet bitSet = filter .isScanRequired(minMaxIndex.getMaxValues(), minMaxIndex.getMinValues(), minMaxIndex.getIsMinMaxSet()); if (bitSet.isEmpty()) { return false; } else { return true; } } } return true; }
void apppendBlocklet(DataOutputStream outputStream) throws IOException { outputStream.write(CarbonStreamOutputFormat.CARBON_SYNC_MARKER); BlockletInfo blockletInfo = new BlockletInfo(); blockletInfo.setNum_rows(getRowIndex() + 1); BlockletHeader blockletHeader = new BlockletHeader(); blockletHeader.setBlocklet_length(getCount()); blockletHeader.setMutation(MutationType.INSERT); blockletHeader.setBlocklet_info(blockletInfo); // add blocklet level min/max blockletMinMaxIndex = generateBlockletMinMax(); if (blockletInfo.getNum_rows() > 1) { BlockletIndex blockletIndex = new BlockletIndex(); blockletIndex.setMin_max_index(CarbonMetadataUtil.convertMinMaxIndex(blockletMinMaxIndex)); blockletHeader.setBlocklet_index(blockletIndex); } byte[] headerBytes = CarbonUtil.getByteArray(blockletHeader); outputStream.writeInt(headerBytes.length); outputStream.write(headerBytes); byte[] compressed = compressor.compressByte(getBytes(), getCount()); outputStream.writeInt(compressed.length); outputStream.write(compressed); }
CarbonMetadataUtil.getBlockletIndex( encodedBlocklet, model.getSegmentProperties().getMeasures())); BlockletInfo3 blockletInfo3 =
.getIndexHeader(localCardinality, thriftColumnSchemaList, model.getBucketId(), model.getSchemaUpdatedTimeStamp()); List<BlockIndex> blockIndexThrift = CarbonMetadataUtil.getBlockIndexInfo(blockIndexInfoList); String indexFileName; if (enableDirectlyWriteDataToStorePath) {
private void fillBasicFields(ColumnPage inputPage, DataChunk2 dataChunk) throws IOException { dataChunk.setChunk_meta(CarbonMetadataUtil.getChunkCompressorMeta(inputPage, dataChunk.getData_page_length())); dataChunk.setNumberOfRowsInpage(inputPage.getPageSize()); dataChunk.setRowMajor(false); }
.convertFileFooterVersion3(blockletMetadata, blockletIndex, localCardinality, thriftColumnSchemaList.size()); convertFileMeta.setIs_sort(isSorted);
/** * merge new blocklet index and old file index to create new file index */ private static void updateStreamFileIndex(Map<String, StreamFileIndex> indexMap, String indexPath, FileFactory.FileType fileType, DataType[] msrDataTypes ) throws IOException { List<BlockIndex> blockIndexList = readIndexFile(indexPath, fileType); for (BlockIndex blockIndex : blockIndexList) { BlockletMinMaxIndex fileIndex = CarbonMetadataUtil .convertExternalMinMaxIndex(blockIndex.getBlock_index().getMin_max_index()); StreamFileIndex blockletIndex = indexMap.get(blockIndex.getFile_name()); if (blockletIndex == null) { // should index all stream file indexMap.put(blockIndex.getFile_name(), new StreamFileIndex(blockIndex.getFile_name(), fileIndex, blockIndex.getNum_rows())); } else { // merge minMaxIndex into StreamBlockIndex blockletIndex.setRowCount(blockletIndex.getRowCount() + blockIndex.getNum_rows()); mergeBatchMinMax(blockletIndex, fileIndex, msrDataTypes); } } }
public static BlockletIndex getBlockletIndex( org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex info) { BlockletMinMaxIndex blockletMinMaxIndex = convertMinMaxIndex(info.getMinMaxIndex()); BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex(); blockletBTreeIndex.setStart_key(info.getBtreeIndex().getStartKey()); blockletBTreeIndex.setEnd_key(info.getBtreeIndex().getEndKey()); BlockletIndex blockletIndex = new BlockletIndex(); blockletIndex.setMin_max_index(blockletMinMaxIndex); blockletIndex.setB_tree_index(blockletBTreeIndex); return blockletIndex; }
&& blockIndex.getBlock_index().getMin_max_index() != null) { streamFile.setMinMaxIndex(CarbonMetadataUtil .convertExternalMinMaxIndex(blockIndex.getBlock_index().getMin_max_index()));
BlockletInfo blockletInfo = blockletInfoList.get(i); blockIndexReplica .setBlock_index(CarbonMetadataUtil.getBlockletIndex(blockletInfo.getBlockletIndex())); blockIndexReplica .setBlocklet_info(CarbonMetadataUtil.getBlocletInfo3(blockletInfo)); blockIndexThrift.add(blockIndexReplica);
if (streamFileIndex != null) { blockletIndex.setMin_max_index( CarbonMetadataUtil.convertMinMaxIndex(streamFileIndex.getMinMaxIndex())); blockIndex.setNum_rows(streamFileIndex.getRowCount()); } else {
private void writeFileHeader() throws IOException { List<ColumnSchema> wrapperColumnSchemaList = CarbonUtil .getColumnSchemaList(carbonTable.getDimensionByTableName(carbonTable.getTableName()), carbonTable.getMeasureByTableName(carbonTable.getTableName())); int[] dimLensWithComplex = new int[wrapperColumnSchemaList.size()]; for (int i = 0; i < dimLensWithComplex.length; i++) { dimLensWithComplex[i] = Integer.MAX_VALUE; } int[] dictionaryColumnCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchemaList); List<Integer> cardinality = new ArrayList<>(); List<org.apache.carbondata.format.ColumnSchema> columnSchemaList = AbstractFactDataWriter .getColumnSchemaListAndCardinality(cardinality, dictionaryColumnCardinality, wrapperColumnSchemaList); FileHeader fileHeader = CarbonMetadataUtil.getFileHeader(true, columnSchemaList, System.currentTimeMillis()); fileHeader.setIs_footer_present(false); fileHeader.setIs_splitable(true); fileHeader.setSync_marker(CarbonStreamOutputFormat.CARBON_SYNC_MARKER); fileHeader.setCompressor_name(compressorName); outputStream.write(CarbonUtil.getByteArray(fileHeader)); }