protected BlockletDetailInfo getBlockletDetailInfo(DataMapRow row, short blockletId, ExtendedBlocklet blocklet) { BlockletDetailInfo detailInfo = new BlockletDetailInfo(); detailInfo.setRowCount(row.getInt(ROW_COUNT_INDEX)); detailInfo.setVersionNumber(row.getShort(VERSION_INDEX)); detailInfo.setBlockletId(blockletId); detailInfo.setDimLens(getColumnCardinality()); detailInfo.setSchemaUpdatedTimeStamp(row.getLong(SCHEMA_UPADATED_TIME_INDEX)); try { blocklet.setLocation( new String(row.getByteArray(LOCATIONS), CarbonCommonConstants.DEFAULT_CHARSET) .split(",")); } catch (IOException e) { throw new RuntimeException(e); } detailInfo.setBlockFooterOffset(row.getLong(BLOCK_FOOTER_OFFSET)); detailInfo.setBlockSize(row.getLong(BLOCK_LENGTH)); detailInfo.setLegacyStore(isLegacyStore); return detailInfo; }
detailInfo.getBlockletInfo().setNumberOfRows(detailInfo.getRowCount()); detailInfo.getBlockletInfo().setNumberOfPages(detailInfo.getPagesCount()); detailInfo.setBlockletId(blockInfo.getDetailInfo().getBlockletId()); int[] pageRowCount = new int[detailInfo.getPagesCount()]; int numberOfPagesCompletelyFilled = detailInfo.getRowCount(); numberOfPagesCompletelyFilled /= CarbonVersionConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT_V2; lastPageRowCount = detailInfo.getRowCount() % CarbonVersionConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT_V2; fullyFilledRowsCount = numberOfPagesCompletelyFilled /= CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; lastPageRowCount = detailInfo.getRowCount() % CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; fullyFilledRowsCount = pageRowCount[pageRowCount.length - 1] = lastPageRowCount; detailInfo.getBlockletInfo().setNumberOfRowsPerPage(pageRowCount);
private MeasureColumnChunkReader getMeasureColumnChunkReader(FileReader fileReader) { ColumnarFormatVersion version = ColumnarFormatVersion.valueOf(blockInfos.get(index).getDetailInfo().getVersionNumber()); if (fileReader.isReadPageByPage()) { return CarbonDataReaderFactory.getInstance().getMeasureColumnChunkReader(version, blockInfos.get(index).getDetailInfo().getBlockletInfo(), blockInfos.get(index).getFilePath(), true); } else { return CarbonDataReaderFactory.getInstance().getMeasureColumnChunkReader(version, blockInfos.get(index).getDetailInfo().getBlockletInfo(), blockInfos.get(index).getFilePath(), false); } }
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); this.segment = Segment.toSegment(in.readUTF()); this.version = ColumnarFormatVersion.valueOf(in.readShort()); this.bucketId = in.readUTF(); this.blockletId = in.readUTF(); int numInvalidSegment = in.readInt(); invalidSegments = new ArrayList<>(numInvalidSegment); for (int i = 0; i < numInvalidSegment; i++) { invalidSegments.add(in.readUTF()); } int numberOfDeleteDeltaFiles = in.readInt(); deleteDeltaFiles = new String[numberOfDeleteDeltaFiles]; for (int i = 0; i < numberOfDeleteDeltaFiles; i++) { deleteDeltaFiles[i] = in.readUTF(); } boolean detailInfoExists = in.readBoolean(); if (detailInfoExists) { detailInfo = new BlockletDetailInfo(); detailInfo.readFields(in); } boolean dataMapWriterPathExists = in.readBoolean(); if (dataMapWriterPathExists) { dataMapWritePath = in.readUTF(); } }
if (blockletDetailInfo.getBlockletInfo() == null || blockletDetailInfo .isUseMinMaxForPruning()) { blockInfo.setBlockOffset(blockletDetailInfo.getBlockFooterOffset()); DataFileFooter fileFooter = filePathToFileFooterMapping.get(blockInfo.getFilePath()); if (null == fileFooter) { filePathToSegmentPropertiesMap.put(blockInfo.getFilePath(), segmentProperties); if (blockletDetailInfo.isLegacyStore()) { LOGGER.warn("Skipping Direct Vector Filling as it is not Supported " + "for Legacy store prior to V3 store"); } else { if (null == segmentProperties) { segmentProperties = new SegmentProperties(blockInfo.getDetailInfo().getColumnSchemas(), blockInfo.getDetailInfo().getDimLens()); createFilterExpression(queryModel, segmentProperties); updateColumns(queryModel, blockInfo.getDetailInfo().getColumnSchemas(), blockInfo.getFilePath()); filePathToSegmentPropertiesMap.put(blockInfo.getFilePath(), segmentProperties);
carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3); split.setVersion(ColumnarFormatVersion.V3); BlockletDetailInfo info = new BlockletDetailInfo(); split.setDetailInfo(info); info.setBlockSize(carbonFile.getLength()); info.setVersionNumber(split.getVersion().number()); info.setUseMinMaxForPruning(false); splits.add(split);
@Override protected ExtendedBlocklet createBlocklet(DataMapRow row, String fileName, short blockletId, boolean useMinMaxForPruning) { if (isLegacyStore) { return super.createBlocklet(row, fileName, blockletId, useMinMaxForPruning); } ExtendedBlocklet blocklet = new ExtendedBlocklet(fileName, blockletId + ""); BlockletDetailInfo detailInfo = getBlockletDetailInfo(row, blockletId, blocklet); detailInfo.setColumnSchemas(getColumnSchema()); detailInfo.setBlockletInfoBinary(row.getByteArray(BLOCKLET_INFO_INDEX)); detailInfo.setPagesCount(row.getShort(BLOCKLET_PAGE_COUNT_INDEX)); detailInfo.setUseMinMaxForPruning(useMinMaxForPruning); blocklet.setDetailInfo(detailInfo); return blocklet; }
if (((CarbonInputSplit) inputSplit).getDetailInfo().getBlockFooterOffset() == 0L) { FileReader reader = FileFactory.getFileHolder(FileFactory.getFileType(splitPath), taskAttemptContext.getConfiguration()); ByteBuffer buffer = reader .readByteBuffer(FileFactory.getUpdatedFilePath(splitPath), ((CarbonInputSplit) inputSplit).getDetailInfo().getBlockSize() - 8, 8); ((CarbonInputSplit) inputSplit).getDetailInfo().setBlockFooterOffset(buffer.getLong());
private static DataFileFooter getDataFileFooter(TableBlockInfo tableBlockInfo, boolean forceReadDataFileFooter) throws IOException { BlockletDetailInfo detailInfo = tableBlockInfo.getDetailInfo(); if (detailInfo == null || forceReadDataFileFooter) { AbstractDataFileFooterConverter fileFooterConverter = DataFileFooterConverterFactory.getInstance() .getDataFileFooterConverter(tableBlockInfo.getVersion()); return fileFooterConverter.readDataFileFooter(tableBlockInfo); } else { DataFileFooter fileFooter = new DataFileFooter(); fileFooter.setSchemaUpdatedTimeStamp(detailInfo.getSchemaUpdatedTimeStamp()); ColumnarFormatVersion version = ColumnarFormatVersion.valueOf(detailInfo.getVersionNumber()); AbstractDataFileFooterConverter dataFileFooterConverter = DataFileFooterConverterFactory.getInstance().getDataFileFooterConverter(version); List<ColumnSchema> schema = dataFileFooterConverter.getSchema(tableBlockInfo); fileFooter.setColumnInTable(schema); SegmentInfo segmentInfo = new SegmentInfo(); segmentInfo.setColumnCardinality(detailInfo.getDimLens()); fileFooter.setSegmentInfo(segmentInfo); return fileFooter; } }
/** * Method to check whether there exists any block which does not contain the blocklet info * * @param splitList * @return */ public static boolean isBlockWithoutBlockletInfoExists(List<CarbonInputSplit> splitList) { for (CarbonInputSplit inputSplit : splitList) { if (null == inputSplit.getDetailInfo().getBlockletInfo()) { return true; } } return false; }
if (((CarbonInputSplit) inputSplit).getDetailInfo().getBlockFooterOffset() == 0L) { FileReader reader = FileFactory.getFileHolder(FileFactory.getFileType(splitPath), context.getConfiguration()); .readByteBuffer(FileFactory.getUpdatedFilePath(splitPath), inputSplit.getLength() - 8, 8); ((CarbonInputSplit) inputSplit).getDetailInfo().setBlockFooterOffset(buffer.getLong());
@Override public int numRows() { return blockInfos.get(index).getDetailInfo().getRowCount(); }
public static List<TableBlockInfo> createBlocks(List<CarbonInputSplit> splitList) { List<TableBlockInfo> tableBlockInfoList = new ArrayList<>(); for (CarbonInputSplit split : splitList) { BlockletInfos blockletInfos = new BlockletInfos(split.getNumberOfBlocklets(), 0, split.getNumberOfBlocklets()); try { TableBlockInfo blockInfo = new TableBlockInfo(split.getPath().toString(), split.blockletId, split.getStart(), split.getSegment().toString(), split.getLocations(), split.getLength(), blockletInfos, split.getVersion(), split.getDeleteDeltaFiles()); blockInfo.setDetailInfo(split.getDetailInfo()); blockInfo.setDataMapWriterPath(split.dataMapWritePath); blockInfo.setBlockOffset(split.getDetailInfo().getBlockFooterOffset()); tableBlockInfoList.add(blockInfo); } catch (IOException e) { throw new RuntimeException("fail to get location of split: " + split, e); } } return tableBlockInfoList; }
/** * Create copy of BlockletDetailInfo */ public BlockletDetailInfo copy() { BlockletDetailInfo detailInfo = new BlockletDetailInfo(); detailInfo.rowCount = rowCount; detailInfo.pagesCount = pagesCount; detailInfo.versionNumber = versionNumber; detailInfo.blockletId = blockletId; detailInfo.dimLens = dimLens; detailInfo.schemaUpdatedTimeStamp = schemaUpdatedTimeStamp; detailInfo.blockletInfo = blockletInfo; detailInfo.blockletInfoBinary = blockletInfoBinary; detailInfo.blockFooterOffset = blockFooterOffset; detailInfo.columnSchemas = columnSchemas; detailInfo.columnSchemaBinary = columnSchemaBinary; detailInfo.blockSize = blockSize; detailInfo.isLegacyStore = isLegacyStore; detailInfo.useMinMaxForPruning = useMinMaxForPruning; return detailInfo; }
&& blockInfo.getDetailInfo().getSchemaUpdatedTimeStamp() == 0L) { dataFileMatadata = CarbonUtil.readMetadataFile(blockInfo, true); } else {
private CarbonInputSplit convertToCarbonInputSplit(ExtendedBlocklet blocklet) throws IOException { CarbonInputSplit split = CarbonInputSplit .from(blocklet.getSegmentId(), blocklet.getBlockletId(), new FileSplit(new Path(blocklet.getPath()), 0, blocklet.getLength(), blocklet.getLocations()), ColumnarFormatVersion.valueOf((short) blocklet.getDetailInfo().getVersionNumber()), blocklet.getDataMapWriterPath()); split.setDetailInfo(blocklet.getDetailInfo()); return split; }
@Override public int getPageRowCount(int pageNumber) { return blockInfos.get(index).getDetailInfo().getBlockletInfo() .getNumberOfRowsPerPage()[pageNumber]; }
blockName = blockName + CarbonTablePath.getCarbonDataExtension(); long rowCount = blocklet.getDetailInfo().getRowCount();
public static TableBlockInfo getTableBlockInfo(CarbonInputSplit inputSplit) { BlockletInfos blockletInfos = new BlockletInfos(inputSplit.getNumberOfBlocklets(), 0, inputSplit.getNumberOfBlocklets()); try { TableBlockInfo blockInfo = new TableBlockInfo(inputSplit.getPath().toString(), inputSplit.blockletId, inputSplit.getStart(), inputSplit.getSegment().toString(), inputSplit.getLocations(), inputSplit.getLength(), blockletInfos, inputSplit.getVersion(), inputSplit.getDeleteDeltaFiles()); blockInfo.setDetailInfo(inputSplit.getDetailInfo()); blockInfo.setBlockOffset(inputSplit.getDetailInfo().getBlockFooterOffset()); return blockInfo; } catch (IOException e) { throw new RuntimeException("fail to get location of split: " + inputSplit, e); } }
private DimensionColumnChunkReader getDimensionColumnChunkReader(FileReader fileReader) { ColumnarFormatVersion version = ColumnarFormatVersion.valueOf(blockInfos.get(index).getDetailInfo().getVersionNumber()); if (fileReader.isReadPageByPage()) { return CarbonDataReaderFactory.getInstance().getDimensionColumnChunkReader(version, blockInfos.get(index).getDetailInfo().getBlockletInfo(), dimensionLens, blockInfos.get(index).getFilePath(), true); } else { return CarbonDataReaderFactory.getInstance().getDimensionColumnChunkReader(version, blockInfos.get(index).getDetailInfo().getBlockletInfo(), dimensionLens, blockInfos.get(index).getFilePath(), false); } }