/** * Close the fileName. */ public void close() { CarbonUtil.closeStreams(dataInputStream); }
/** * Method to clear out the dictionary cache. */ @Override public void clear() { CarbonUtil.clearDictionaryCache(dictionary); }
/** * This method will clean up the local folders and files created during compaction process */ private void deleteTempStoreLocation() { if (null != tempStoreLocation) { for (String tempLoc : tempStoreLocation) { try { CarbonUtil.deleteFoldersAndFiles(new File(tempLoc)); } catch (IOException | InterruptedException e) { LOGGER.error("Problem deleting local folders during compaction: " + e.getMessage()); } } } }
@Override protected byte[] convertDictionaryValue(int indexColIdx, Object value) { // input value from onPageAdded in load process is byte[] // for dict columns including dictionary and date columns decode value to get the surrogate key int thisKeyIdx = indexCol2MdkIdx.get(indexColumns.get(indexColIdx).getColName()); int surrogateKey = CarbonUtil.getSurrogateInternal((byte[]) value, 0, columnarSplitter.getBlockKeySize()[thisKeyIdx]); // store the dictionary key in bloom return CarbonUtil.getValueAsBytes(DataTypes.INT, surrogateKey); } }
private static List<CarbonDimension> getCarbonDimsMappedToKeyGenerator( List<CarbonDimension> carbonDimensions) { List<CarbonDimension> listOfCarbonDimPartOfKeyGen = new ArrayList<CarbonDimension>(carbonDimensions.size()); for (CarbonDimension carbonDim : carbonDimensions) { if (CarbonUtil.hasEncoding(carbonDim.getEncoder(), Encoding.DICTIONARY) || CarbonUtil .hasEncoding(carbonDim.getEncoder(), Encoding.DIRECT_DICTIONARY)) { listOfCarbonDimPartOfKeyGen.add(carbonDim); } } return listOfCarbonDimPartOfKeyGen; }
private void writeFileHeader() throws IOException { List<ColumnSchema> wrapperColumnSchemaList = CarbonUtil .getColumnSchemaList(carbonTable.getDimensionByTableName(carbonTable.getTableName()), carbonTable.getMeasureByTableName(carbonTable.getTableName())); int[] dimLensWithComplex = new int[wrapperColumnSchemaList.size()]; for (int i = 0; i < dimLensWithComplex.length; i++) { dimLensWithComplex[i] = Integer.MAX_VALUE; } int[] dictionaryColumnCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchemaList); List<Integer> cardinality = new ArrayList<>(); List<org.apache.carbondata.format.ColumnSchema> columnSchemaList = AbstractFactDataWriter .getColumnSchemaListAndCardinality(cardinality, dictionaryColumnCardinality, wrapperColumnSchemaList); FileHeader fileHeader = CarbonMetadataUtil.getFileHeader(true, columnSchemaList, System.currentTimeMillis()); fileHeader.setIs_footer_present(false); fileHeader.setIs_splitable(true); fileHeader.setSync_marker(CarbonStreamOutputFormat.CARBON_SYNC_MARKER); fileHeader.setCompressor_name(compressorName); outputStream.write(CarbonUtil.getByteArray(fileHeader)); }
/** * Finish writing current file. It will flush stream, copy and rename temp file to final file * @param copyInCurrentThread set to false if want to do data copy in a new thread */ protected void commitCurrentFile(boolean copyInCurrentThread) { notifyDataMapBlockEnd(); CarbonUtil.closeStreams(this.fileOutputStream, this.fileChannel); if (!enableDirectlyWriteDataToStorePath) { try { if (copyInCurrentThread) { CarbonUtil.copyCarbonDataFileToCarbonStorePath(carbonDataFileTempPath, model.getCarbonDataDirectoryPath(), fileSizeInBytes); FileFactory .deleteFile(carbonDataFileTempPath, FileFactory.getFileType(carbonDataFileTempPath)); } else { executorServiceSubmitList .add(executorService.submit(new CompleteHdfsBackendThread(carbonDataFileTempPath))); } } catch (IOException e) { LOGGER.error(e); } } }
private void initializeFilter() { List<ColumnSchema> wrapperColumnSchemaList = CarbonUtil .getColumnSchemaList(carbonTable.getDimensionByTableName(carbonTable.getTableName()), carbonTable.getMeasureByTableName(carbonTable.getTableName())); int[] dimLensWithComplex = new int[wrapperColumnSchemaList.size()]; for (int i = 0; i < dimLensWithComplex.length; i++) { dimLensWithComplex[i] = Integer.MAX_VALUE; } int[] dictionaryColumnCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchemaList); SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchemaList, dictionaryColumnCardinality); Map<Integer, GenericQueryType> complexDimensionInfoMap = new HashMap<>(); FilterResolverIntf resolverIntf = model.getFilterExpressionResolverTree(); filter = FilterUtil.getFilterExecuterTree(resolverIntf, segmentProperties, complexDimensionInfoMap); // for row filter, we need update column index FilterUtil.updateIndexOfColumnExpression(resolverIntf.getFilterExpression(), carbonTable.getDimensionOrdinalMax()); }
if (CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.INVERTED_INDEX)) { invertedIndexes = CarbonUtil .getUnCompressColumnIndex(pageMetadata.rowid_page_length, pageData, offset); offset += pageMetadata.rowid_page_length; if (vectorInfo == null) { invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes); if (CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.RLE)) { rlePage = CarbonUtil.getIntArray(pageData, offset, pageMetadata.rle_page_length); if (!CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.DICTIONARY)) { DimensionChunkStoreFactory.DimensionStoreType dimStoreType = null != rawColumnPage.getLocalDictionary() ? DimensionChunkStoreFactory.DimensionStoreType.LOCAL_DICT : (CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.DIRECT_COMPRESS_VARCHAR) ? DimensionChunkStoreFactory.DimensionStoreType.VARIABLE_INT_LENGTH : DimensionChunkStoreFactory.DimensionStoreType.VARIABLE_SHORT_LENGTH);
.getNoDictionaryCount()); List<ColumnSchema> wrapperColumnSchema = CarbonUtil .getColumnSchemaList(carbonTable.getDimensionByTableName(tableName), carbonTable.getMeasureByTableName(tableName)); carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema); .getFormattedCardinality(segmentProperties.getDimColumnsCardinality(), wrapperColumnSchema); carbonFactDataHandlerModel.setColCardinality(formattedCardinality); CarbonUtil.checkAndCreateFolderWithPermission(carbonDataDirectoryPath); carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath); carbonFactDataHandlerModel.setPrimitiveDimLens(segmentProperties.getDimColumnsCardinality()); carbonFactDataHandlerModel.initNumberOfCores(); carbonFactDataHandlerModel .setColumnLocalDictGenMap(CarbonUtil.getLocalDictionaryModel(carbonTable)); carbonFactDataHandlerModel.setVarcharDimIdxInNoDict(varcharDimIdxInNoDict); return carbonFactDataHandlerModel;
@Override protected byte[] convertDictionaryValue(int indexColIdx, Object value) { // input value from IndexDataMapRebuildRDD is already decoded as surrogate key return CarbonUtil.getValueAsBytes(DataTypes.INT, value); }
/** * Return the total size of serialized data and metadata */ public int getTotalSerializedSize() { int metadataSize = CarbonUtil.getByteArray(pageMetadata).length; int dataSize = encodedData.length; return metadataSize + dataSize; }
if (dimensionChunksOffset.size() - 1 == blockIndex) { dimensionColumnChunk = CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionRawColumnChunk.getLength()); int totalDimensionDataLength = dimensionColumnChunk.data_page_length + dimensionColumnChunk.rle_page_length CarbonUtil.readDataChunk(rawData, copySourcePoint, dimensionChunksLength.get(blockIndex)); copySourcePoint += dimensionChunksLength.get(blockIndex); if (CarbonUtil.hasEncoding(dimensionColumnChunk.encoders, Encoding.INVERTED_INDEX)) { byte[] dataInv = new byte[dimensionColumnChunk.rowid_page_length]; rawData.position(copySourcePoint); rawData.get(dataInv); invertedIndexes = CarbonUtil .getUnCompressColumnIndex(dimensionColumnChunk.rowid_page_length, dataInv, numberCompressor, 0); copySourcePoint += dimensionColumnChunk.rowid_page_length; invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes); if (CarbonUtil.hasEncoding(dimensionColumnChunk.encoders, Encoding.RLE)) { byte[] dataRle = new byte[dimensionColumnChunk.rle_page_length]; rawData.position(copySourcePoint); if (!CarbonUtil.hasEncoding(dimensionColumnChunk.encoders, Encoding.DICTIONARY)) { columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse,
List<Encoding> encodings = pageMetadata.getEncoders(); org.apache.carbondata.core.metadata.encoder.Encoding.validateEncodingTypes(encodings); if (CarbonUtil.isEncodedWithMeta(encodings)) { int[] invertedIndexes = new int[0]; int[] invertedIndexesReverse = new int[0]; CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.INVERTED_INDEX); int dataOffset = offset; if (isExplicitSorted) { offset += pageMetadata.data_page_length; invertedIndexes = CarbonUtil .getUnCompressColumnIndex(pageMetadata.rowid_page_length, pageData, offset); if (vectorInfo == null) { invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes); } else { vectorInfo.invertedIndex = invertedIndexes;
new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, columnIdentifier.getDataType()); Boolean isDictExists = CarbonUtil.isFileExistsForGivenColumn(identifier); Dictionary dictionary = null; long t1 = System.currentTimeMillis(); CarbonUtil.clearDictionaryCache(dictionary);
private void updateDimensionMinMax(EncodedColumnPage[] dimensions) { for (int i = 0; i < dimensions.length; i++) { SimpleStatsResult stats = dimensions[i].getStats(); Object min = stats.getMin(); Object max = stats.getMax(); if (CarbonUtil.isEncodedWithMeta(dimensions[i].getPageMetadata().getEncoders())) { dimensionMaxValue[i] = DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(max, stats.getDataType()); dimensionMinValue[i] = DataTypeUtil .getMinMaxBytesBasedOnDataTypeForNoDictionaryColumn(min, stats.getDataType()); } else { dimensionMaxValue[i] = CarbonUtil.getValueAsBytes(stats.getDataType(), max); dimensionMinValue[i] = CarbonUtil.getValueAsBytes(stats.getDataType(), min); } writeMinMaxForDimensions[i] = stats.writeMinMax(); } }
if (CarbonUtil.hasEncoding(dataChunk.getEncodingList(), Encoding.INVERTED_INDEX)) { byte[] columnIndexData; .getUnCompressColumnIndex(dataChunk.getRowIdPageLength(), columnIndexData, numberCompressor, 0); invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes); .hasEncoding(dataChunk.getEncodingList(), Encoding.RLE)) { .hasEncoding(dataChunk.getEncodingList(), Encoding.DICTIONARY)) { columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse,
@Override public void deleteDatamapData(Segment segment) throws IOException { try { String segmentId = segment.getSegmentNo(); String datamapPath = CarbonTablePath .getDataMapStorePath(tableIdentifier.getTablePath(), segmentId, dataMapName); if (FileFactory.isFileExist(datamapPath)) { CarbonFile file = FileFactory.getCarbonFile(datamapPath, FileFactory.getFileType(datamapPath)); CarbonUtil.deleteFoldersAndFilesSilent(file); } } catch (InterruptedException ex) { throw new IOException("drop datamap failed, failed to delete datamap directory"); } }
CarbonTable carbonTable) { List<ColumnSchema> wrapperColumnSchema = CarbonUtil .getColumnSchemaList(carbonTable.getDimensionByTableName(carbonTable.getTableName()), carbonTable.getMeasureByTableName(carbonTable.getTableName())); boolean islocalDictEnabled = carbonTable.isLocalDictionaryEnabled();
/** * @param dictionaryColumnCardinality * @param wrapperColumnSchemaList * @return It returns formatted cardinality by adding -1 value for NoDictionary columns */ public static int[] getFormattedCardinality(int[] dictionaryColumnCardinality, List<ColumnSchema> wrapperColumnSchemaList) { List<Integer> cardinality = new ArrayList<>(); int counter = 0; for (int i = 0; i < wrapperColumnSchemaList.size(); i++) { if (CarbonUtil.hasEncoding(wrapperColumnSchemaList.get(i).getEncodingList(), Encoding.DICTIONARY)) { cardinality.add(dictionaryColumnCardinality[counter]); counter++; } else if (!wrapperColumnSchemaList.get(i).isDimensionColumn()) { continue; } else { cardinality.add(-1); } } return ArrayUtils.toPrimitive(cardinality.toArray(new Integer[cardinality.size()])); }