this.columnName = encodedColumnPage.getActualPage().getColumnSpec().getFieldName(); return;
.getDataType().getName() + " Destination data type: " + targetDataType .getName() + " for the column: " + noDictDimensionPages[noDictIndex] .getColumnSpec().getFieldName() + " having encoding type: " + columnPageEncoder.getEncodingType());
if (!stats.writeMinMax()) { mergedWriteMinMaxFlag[i] = stats.writeMinMax(); String columnName = encodedColumnPage.getActualPage().getColumnSpec().getFieldName(); LOGGER.info("Min Max writing of blocklet ignored for column with name " + columnName); break;
String compressorName = meta.getCompressorName(); TableSpec.ColumnSpec spec = TableSpec.ColumnSpec .newInstance(columnSpec.getFieldName(), DataTypes.INT, ColumnType.MEASURE); ColumnPage rowOffset = ColumnPage.newPage( new ColumnPageEncoderMeta(spec, DataTypes.INT, compressorName),
/** * Below method will be used to add column data to page * * @param rowId row number * @param bytes actual data */ @Override public void putBytes(int rowId, byte[] bytes) { if (null != pageLevelDictionary) { try { actualDataColumnPage.putBytes(rowId, bytes); dummyKey[0] = pageLevelDictionary.getDictionaryValue(bytes); encodedDataColumnPage.putBytes(rowId, keyGenerator.generateKey(dummyKey)); } catch (DictionaryThresholdReachedException e) { LOGGER.warn("Local Dictionary threshold reached for the column: " + actualDataColumnPage .getColumnSpec().getFieldName() + ", " + e.getMessage()); pageLevelDictionary = null; encodedDataColumnPage.freeMemory(); encodedDataColumnPage = null; } catch (KeyGenException e) { LOGGER.error("Unable to generate key for: " + actualDataColumnPage .getColumnSpec().getFieldName(), e); throw new RuntimeException(e); } } else { actualDataColumnPage.putBytes(rowId, bytes); } }
private static ColumnPage getLVBytesColumnPage(TableSpec.ColumnSpec columnSpec, byte[] lvEncodedBytes, DataType dataType, int lvLength, String compressorName) throws MemoryException { // extract length and data, set them to rowOffset and unsafe memory correspondingly int rowId = 0; TableSpec.ColumnSpec spec = TableSpec.ColumnSpec .newInstance(columnSpec.getFieldName(), DataTypes.INT, ColumnType.MEASURE); ColumnPage rowOffset = ColumnPage.newPage( new ColumnPageEncoderMeta(spec, DataTypes.INT, compressorName), CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT); int length; int offset; int lvEncodedOffset = 0; int counter = 0; // extract Length field in input and calculate total length for (offset = 0; lvEncodedOffset < lvEncodedBytes.length; offset += length) { length = ByteUtil.toInt(lvEncodedBytes, lvEncodedOffset); rowOffset.putInt(counter, offset); lvEncodedOffset += lvLength + length; rowId++; counter++; } rowOffset.putInt(counter, offset); return getVarLengthColumnPage(columnSpec, lvEncodedBytes, dataType, lvLength, rowId, rowOffset, offset, compressorName); }
private static ColumnPage getComplexLVBytesColumnPage(TableSpec.ColumnSpec columnSpec, byte[] lvEncodedBytes, DataType dataType, int lvLength, String compressorName) throws MemoryException { // extract length and data, set them to rowOffset and unsafe memory correspondingly int rowId = 0; TableSpec.ColumnSpec spec = TableSpec.ColumnSpec .newInstance(columnSpec.getFieldName(), DataTypes.INT, ColumnType.MEASURE); ColumnPage rowOffset = ColumnPage.newPage( new ColumnPageEncoderMeta(spec, DataTypes.INT, compressorName), CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT); int length; int offset; int lvEncodedOffset = 0; int counter = 0; // extract Length field in input and calculate total length for (offset = 0; lvEncodedOffset < lvEncodedBytes.length; offset += length) { length = ByteUtil.toShort(lvEncodedBytes, lvEncodedOffset); rowOffset.putInt(counter, offset); lvEncodedOffset += lvLength + length; rowId++; counter++; } rowOffset.putInt(counter, offset); return getVarLengthColumnPage(columnSpec, lvEncodedBytes, dataType, lvLength, rowId, rowOffset, offset, compressorName); }
/** * Below method will be used to add dimension column pages * * @param encodedTablePage * encoded table page */ private void addEncodedDimensionPage(EncodedTablePage encodedTablePage) { // for first page create new list if (null == encodedDimensionColumnPages) { encodedDimensionColumnPages = new ArrayList<>(); // adding measure pages for (int i = 0; i < encodedTablePage.getNumDimensions(); i++) { BlockletEncodedColumnPage blockletEncodedColumnPage = new BlockletEncodedColumnPage(executorService, isDecoderBasedFallBackEnabled, localDictionaryGeneratorMap.get( encodedTablePage.getDimension(i).getActualPage().getColumnSpec() .getFieldName())); blockletEncodedColumnPage.addEncodedColumnPage(encodedTablePage.getDimension(i)); encodedDimensionColumnPages.add(blockletEncodedColumnPage); } } else { for (int i = 0; i < encodedTablePage.getNumDimensions(); i++) { encodedDimensionColumnPages.get(i).addEncodedColumnPage(encodedTablePage.getDimension(i)); } } }
/** * Create a new column page with input data type and page size. */ protected LocalDictColumnPage(ColumnPage actualDataColumnPage, ColumnPage encodedColumnpage, LocalDictionaryGenerator localDictionaryGenerator, boolean isComplexTypePrimitive, boolean isDecoderBasedFallBackEnabled) { super(actualDataColumnPage.getColumnPageEncoderMeta(), actualDataColumnPage.getPageSize()); // if threshold is not reached then create page level dictionary // for encoding with local dictionary if (!localDictionaryGenerator.isThresholdReached()) { pageLevelDictionary = new PageLevelDictionary(localDictionaryGenerator, actualDataColumnPage.getColumnSpec().getFieldName(), actualDataColumnPage.getDataType(), isComplexTypePrimitive, actualDataColumnPage.getColumnCompressorName()); this.encodedDataColumnPage = encodedColumnpage; this.keyGenerator = KeyGeneratorFactory .getKeyGenerator(new int[] { CarbonCommonConstants.LOCAL_DICTIONARY_MAX + 1 }); this.dummyKey = new int[1]; } else { // else free the encoded column page memory as its of no use encodedColumnpage.freeMemory(); } this.isDecoderBasedFallBackEnabled = isDecoderBasedFallBackEnabled; this.actualDataColumnPage = actualDataColumnPage; }
public static EncodedColumnPage encodedColumn(ColumnPage page) throws IOException, MemoryException { ColumnPageEncoder pageEncoder = createCodecForDimension(page); if (pageEncoder == null) { ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null); return encoder.encode(page); } else { LOGGER.debug("Encoder result ---> Source data type: " + pageEncoder.getEncoderMeta(page) .getColumnSpec().getSchemaDataType() + " Destination data type: " + pageEncoder .getEncoderMeta(page).getStoreDataType() + " for the column: " + pageEncoder .getEncoderMeta(page).getColumnSpec().getFieldName()); return pageEncoder.encode(page); } }
VarLengthColumnPageBase(ColumnPageEncoderMeta columnPageEncoderMeta, int pageSize) { super(columnPageEncoderMeta, pageSize); TableSpec.ColumnSpec spec = TableSpec.ColumnSpec.newInstance( columnPageEncoderMeta.getColumnSpec().getFieldName(), DataTypes.INT, ColumnType.MEASURE); try { rowOffset = ColumnPage.newPage( new ColumnPageEncoderMeta(spec, DataTypes.INT, columnPageEncoderMeta.getCompressorName()), pageSize); } catch (MemoryException e) { throw new RuntimeException(e); } totalLength = 0; }