@Override protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, inputPage.getColumnCompressorName()); }
@Override protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, inputPage.getColumnCompressorName()); }
@Override protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, stats, inputPage.getColumnCompressorName()); }
@Override protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), targetDataType, inputPage.getStatistics(), inputPage.getColumnCompressorName()); }
@Override protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { return new ColumnPageEncoderMeta(inputPage.getColumnSpec(), inputPage.getDataType(), inputPage.getStatistics(), inputPage.getColumnCompressorName()); } };
/** * Below method will be used to add column data to page * * @param rowId row number * @param bytes actual data */ @Override public void putBytes(int rowId, byte[] bytes) { if (null != pageLevelDictionary) { try { actualDataColumnPage.putBytes(rowId, bytes); dummyKey[0] = pageLevelDictionary.getDictionaryValue(bytes); encodedDataColumnPage.putBytes(rowId, keyGenerator.generateKey(dummyKey)); } catch (DictionaryThresholdReachedException e) { LOGGER.warn("Local Dictionary threshold reached for the column: " + actualDataColumnPage .getColumnSpec().getFieldName() + ", " + e.getMessage()); pageLevelDictionary = null; encodedDataColumnPage.freeMemory(); encodedDataColumnPage = null; } catch (KeyGenException e) { LOGGER.error("Unable to generate key for: " + actualDataColumnPage .getColumnSpec().getFieldName(), e); throw new RuntimeException(e); } } else { actualDataColumnPage.putBytes(rowId, bytes); } }
@Override public byte[] getChunkData(int rowId) { byte[] nullBitSet = getNullBitSet(rowId, columnPage.getColumnSpec().getColumnType()); if (nullBitSet != null) { // if this row is null, return default null represent in byte array return nullBitSet; } else { if (isExplicitSorted()) { rowId = getInvertedReverseIndex(rowId); } return getChunkDataInBytes(rowId); } }
@Override protected ColumnPageEncoderMeta getEncoderMeta(ColumnPage inputPage) { return new RLEEncoderMeta(inputPage.getColumnSpec(), inputPage.getDataType(), inputPage.getPageSize(), inputPage.getStatistics(), inputPage.getColumnCompressorName()); }
private Object getValue(ColumnPage page, int rowId) { DataType type = page.getColumnSpec().getSchemaDataType(); Object value = null; if (type == DataTypes.BYTE) {
/** * Below method will be used to add dimension column pages * * @param encodedTablePage * encoded table page */ private void addEncodedDimensionPage(EncodedTablePage encodedTablePage) { // for first page create new list if (null == encodedDimensionColumnPages) { encodedDimensionColumnPages = new ArrayList<>(); // adding measure pages for (int i = 0; i < encodedTablePage.getNumDimensions(); i++) { BlockletEncodedColumnPage blockletEncodedColumnPage = new BlockletEncodedColumnPage(executorService, isDecoderBasedFallBackEnabled, localDictionaryGeneratorMap.get( encodedTablePage.getDimension(i).getActualPage().getColumnSpec() .getFieldName())); blockletEncodedColumnPage.addEncodedColumnPage(encodedTablePage.getDimension(i)); encodedDimensionColumnPages.add(blockletEncodedColumnPage); } } else { for (int i = 0; i < encodedTablePage.getNumDimensions(); i++) { encodedDimensionColumnPages.get(i).addEncodedColumnPage(encodedTablePage.getDimension(i)); } } }
@Override public FallbackEncodedColumnPage call() throws Exception { // disable encoding using local dictionary encodedColumnPage.getActualPage().disableLocalDictEncoding(); // get column spec for existing column page TableSpec.ColumnSpec columnSpec = encodedColumnPage.getActualPage().getColumnSpec(); FallbackEncodedColumnPage fallbackEncodedColumnPage = CarbonUtil .getFallBackEncodedColumnPage(encodedColumnPage.getActualPage(), pageIndex, columnSpec); // here freeing the memory of raw column page as fallback is done and column page will not // be used. // This is required to free the memory once it is of no use encodedColumnPage.freeMemory(); return fallbackEncodedColumnPage; } }
if (!stats.writeMinMax()) { mergedWriteMinMaxFlag[i] = stats.writeMinMax(); String columnName = encodedColumnPage.getActualPage().getColumnSpec().getFieldName(); LOGGER.info("Min Max writing of blocklet ignored for column with name " + columnName); break;
/** * method to add complex column data * @param depth * complexColumnIndex of column * @param dataList * dataList */ public void putComplexData(int depth, List<byte[]> dataList) { assert (depth <= this.complexColumnIndex); int positionNumber = currentRowIdList[depth]; for (byte[] value : dataList) { if (columnPages[depth].getDataType() != DataTypes.BYTE_ARRAY) { if ((value == null) || (value.length == 0)) { columnPages[depth].putNull(positionNumber); columnPages[depth].statsCollector.updateNull(positionNumber); columnPages[depth].nullBitSet.set(positionNumber); } else { columnPages[depth].putData(positionNumber, DataTypeUtil .getDataBasedOnDataTypeForNoDictionaryColumn(value, columnPages[depth].getColumnSpec().getSchemaDataType(), false)); } } else { columnPages[depth].putData(positionNumber, value); } positionNumber++; } currentRowIdList[depth] = positionNumber; }
@Override public int compareTo(int rowId, byte[] compareValue) { // rowId is the inverted index, but the null bitset is based on actual data int nullBitSetRowId = rowId; if (isExplicitSorted()) { nullBitSetRowId = getInvertedIndex(rowId); } byte[] nullBitSet = getNullBitSet(nullBitSetRowId, columnPage.getColumnSpec().getColumnType()); if (nullBitSet != null && ByteUtil.UnsafeComparer.INSTANCE.compareTo(nullBitSet, compareValue) == 0) { // check if the compare value is a null value // if the compare value is null and the data is also null we can directly return 0 return 0; } else { byte[] chunkData = this.getChunkDataInBytes(rowId); return ByteUtil.UnsafeComparer.INSTANCE.compareTo(chunkData, compareValue); } }
/** * Get the new column page based on the sorted data * * @param input * @return * @throws MemoryException */ public ColumnPage getSortedColumnPageIfRequired(ColumnPage input) throws MemoryException { if (null != indexStorage) { Object[] dataPage = indexStorage.getDataPage(); ColumnPageEncoderMeta columnPageEncoderMeta = new ColumnPageEncoderMeta(input.getColumnSpec(), input.getDataType(), input.getColumnPageEncoderMeta().getCompressorName()); ColumnPage columnPage = ColumnPage.newPage(columnPageEncoderMeta, input.getPageSize()); putDataToPage(columnPage, dataPage); return columnPage; } else { return input; } }
private BlockletMinMaxIndex buildMinMaxIndex(ColumnPage inputPage, List<Encoding> encoders) { BlockletMinMaxIndex index = new BlockletMinMaxIndex(); ByteBuffer max; ByteBuffer min; if (CarbonUtil.isEncodedWithMeta(encoders) && inputPage.getColumnSpec().getColumnType() == ColumnType.PLAIN_VALUE) { max = ByteBuffer.wrap(DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(inputPage.getStatistics().getMax(), inputPage.getDataType())); min = ByteBuffer.wrap(DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(inputPage.getStatistics().getMin(), inputPage.getDataType())); } else { byte[] bytes = CarbonUtil.getValueAsBytes(inputPage.getDataType(), inputPage.getStatistics().getMax()); max = ByteBuffer.wrap(bytes); min = ByteBuffer.wrap( CarbonUtil.getValueAsBytes(inputPage.getDataType(), inputPage.getStatistics().getMin())); } index.addToMax_values(max); index.addToMin_values(min); index.addToMin_max_presence(inputPage.getStatistics().writeMinMax()); return index; }
/** * Create a new column page with input data type and page size. */ protected LocalDictColumnPage(ColumnPage actualDataColumnPage, ColumnPage encodedColumnpage, LocalDictionaryGenerator localDictionaryGenerator, boolean isComplexTypePrimitive, boolean isDecoderBasedFallBackEnabled) { super(actualDataColumnPage.getColumnPageEncoderMeta(), actualDataColumnPage.getPageSize()); // if threshold is not reached then create page level dictionary // for encoding with local dictionary if (!localDictionaryGenerator.isThresholdReached()) { pageLevelDictionary = new PageLevelDictionary(localDictionaryGenerator, actualDataColumnPage.getColumnSpec().getFieldName(), actualDataColumnPage.getDataType(), isComplexTypePrimitive, actualDataColumnPage.getColumnCompressorName()); this.encodedDataColumnPage = encodedColumnpage; this.keyGenerator = KeyGeneratorFactory .getKeyGenerator(new int[] { CarbonCommonConstants.LOCAL_DICTIONARY_MAX + 1 }); this.dummyKey = new int[1]; } else { // else free the encoded column page memory as its of no use encodedColumnpage.freeMemory(); } this.isDecoderBasedFallBackEnabled = isDecoderBasedFallBackEnabled; this.actualDataColumnPage = actualDataColumnPage; }
private byte[] getChunkDataInBytes(int rowId) { ColumnType columnType = columnPage.getColumnSpec().getColumnType(); DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); DataType targetDataType = columnPage.getDataType(); if (null != localDictionary) {
private static ColumnPageEncoder createCodecForDimension(ColumnPage inputPage) { TableSpec.ColumnSpec columnSpec = inputPage.getColumnSpec(); if (columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { if (inputPage.getDataType() == DataTypes.BYTE_ARRAY || inputPage.getDataType() == DataTypes.STRING) { // use legacy encoder return null; } else if ((inputPage.getDataType() == DataTypes.BYTE) || (inputPage.getDataType() == DataTypes.SHORT) || (inputPage.getDataType() == DataTypes.INT) || ( inputPage.getDataType() == DataTypes.LONG)) { return selectCodecByAlgorithmForIntegral(inputPage.getStatistics(), true, columnSpec) .createEncoder(null); } else if ((inputPage.getDataType() == DataTypes.FLOAT) || (inputPage.getDataType() == DataTypes.DOUBLE)) { return selectCodecByAlgorithmForFloating(inputPage.getStatistics(), true, columnSpec) .createEncoder(null); } } // use legacy encoder return null; }
} else if (i < noDictionaryCount) { if (DataTypeUtil .isPrimitiveColumn(noDictDimensionPages[i].getColumnSpec().getSchemaDataType())) {