/** * Construct Field from ColumnSchema * * @param columnSchema ColumnSchema, Store the information about the column meta data */ public Field(ColumnSchema columnSchema) { this.name = columnSchema.getColumnName(); this.type = columnSchema.getDataType(); children = new LinkedList<>(); schemaOrdinal = columnSchema.getSchemaOrdinal(); precision = columnSchema.getPrecision(); scale = columnSchema.getScale(); }
/** * Convert schema to binary */ public static byte[] convertSchemaToBinary(List<ColumnSchema> columnSchemas) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); DataOutput dataOutput = new DataOutputStream(stream); dataOutput.writeShort(columnSchemas.size()); for (ColumnSchema columnSchema : columnSchemas) { if (columnSchema.getColumnReferenceId() == null) { columnSchema.setColumnReferenceId(columnSchema.getColumnUniqueId()); } columnSchema.write(dataOutput); } byte[] byteArray = stream.toByteArray(); // Compress to reduce the size of schema return CompressorFactory.NativeSupportedCompressor.SNAPPY.getCompressor().compressByte( byteArray); }
/** * traverse through the columns of complex column specified in local dictionary include, * and set local dictionary for all the string and varchar child columns * @param allColumns * @param dimensionOrdinal * @param childColumnCount * @return */ private static int setLocalDictForComplexColumns(List<ColumnSchema> allColumns, int dimensionOrdinal, int childColumnCount) { for (int i = 0; i < childColumnCount; i++) { ColumnSchema column = allColumns.get(dimensionOrdinal); if (column.getNumberOfChild() > 0) { dimensionOrdinal++; setLocalDictForComplexColumns(allColumns, dimensionOrdinal, column.getNumberOfChild()); } else { if (column.isDimensionColumn() && (column.getDataType().equals(DataTypes.STRING) || column.getDataType().equals(DataTypes.VARCHAR)) && !column.hasEncoding(Encoding.DICTIONARY)) { column.setLocalDictColumn(true); } } dimensionOrdinal++; } return dimensionOrdinal; }
/** * @param useInvertedIndex the useInvertedIndex to set */ public void setUseInvertedIndex(boolean useInvertedIndex) { if (useInvertedIndex) { if (!hasEncoding(Encoding.INVERTED_INDEX)) { this.getEncodingList().add(Encoding.INVERTED_INDEX); } } else { if (hasEncoding(Encoding.INVERTED_INDEX)) { this.getEncodingList().remove(Encoding.INVERTED_INDEX); } } }
/** * In case of non transactional table just set columnuniqueid as columnName to support * backward compatabiity. non transactional tables column uniqueid is always equal to * columnname */ public static void updateColumnUniqueIdForNonTransactionTable(List<ColumnSchema> columnSchemas) { for (ColumnSchema columnSchema : columnSchemas) { // In case of complex types only add the name after removing parent names. int index = columnSchema.getColumnName().lastIndexOf("."); if (index >= 0) { columnSchema.setColumnUniqueId(columnSchema.getColumnName() .substring(index + 1, columnSchema.getColumnName().length())); } else { columnSchema.setColumnUniqueId(columnSchema.getColumnName()); } } }
List<org.apache.carbondata.format.Encoding> encoders = new ArrayList<org.apache.carbondata.format.Encoding>(); for (Encoding encoder : wrapperColumnSchema.getEncodingList()) { encoders.add(fromWrapperToExternalEncoding(encoder)); new org.apache.carbondata.format.ColumnSchema( fromWrapperToExternalDataType( wrapperColumnSchema.getDataType()), wrapperColumnSchema.getColumnName(), wrapperColumnSchema.getColumnUniqueId(), true, encoders, wrapperColumnSchema.isDimensionColumn()); thriftColumnSchema.setColumn_group_id(-1); if (DataTypes.isDecimal(wrapperColumnSchema.getDataType())) { thriftColumnSchema.setScale(wrapperColumnSchema.getScale()); thriftColumnSchema.setPrecision(wrapperColumnSchema.getPrecision()); } else { thriftColumnSchema.setScale(-1); thriftColumnSchema.setPrecision(-1); thriftColumnSchema.setNum_child(wrapperColumnSchema.getNumberOfChild()); thriftColumnSchema.setDefault_value(wrapperColumnSchema.getDefaultValue()); thriftColumnSchema.setColumnProperties(wrapperColumnSchema.getColumnProperties()); thriftColumnSchema.setInvisible(wrapperColumnSchema.isInvisible()); thriftColumnSchema.setColumnReferenceId(wrapperColumnSchema.getColumnReferenceId()); thriftColumnSchema.setSchemaOrdinal(wrapperColumnSchema.getSchemaOrdinal()); if (wrapperColumnSchema.isSortColumn()) { Map<String, String> properties = wrapperColumnSchema.getColumnProperties();
@Override public ColumnSchema fromExternalToWrapperColumnSchema( org.apache.carbondata.format.ColumnSchema externalColumnSchema) { ColumnSchema wrapperColumnSchema = new ColumnSchema(); wrapperColumnSchema.setColumnUniqueId(externalColumnSchema.getColumn_id()); wrapperColumnSchema.setColumnName(externalColumnSchema.getColumn_name()); wrapperColumnSchema.setDataType( fromExternalToWrapperDataType( externalColumnSchema.data_type, externalColumnSchema.precision, externalColumnSchema.scale)); wrapperColumnSchema.setDimensionColumn(externalColumnSchema.isDimension()); List<Encoding> encoders = new ArrayList<Encoding>(); for (org.apache.carbondata.format.Encoding encoder : externalColumnSchema.getEncoders()) { encoders.add(fromExternalToWrapperEncoding(encoder)); wrapperColumnSchema.setEncodingList(encoders); wrapperColumnSchema.setNumberOfChild(externalColumnSchema.getNum_child()); wrapperColumnSchema.setPrecision(externalColumnSchema.getPrecision()); wrapperColumnSchema.setScale(externalColumnSchema.getScale()); wrapperColumnSchema.setDefaultValue(externalColumnSchema.getDefault_value()); wrapperColumnSchema.setInvisible(externalColumnSchema.isInvisible()); wrapperColumnSchema.setColumnReferenceId(externalColumnSchema.getColumnReferenceId()); wrapperColumnSchema.setSchemaOrdinal(externalColumnSchema.getSchemaOrdinal()); wrapperColumnSchema.setSortColumn(false); Map<String, String> properties = externalColumnSchema.getColumnProperties(); if (properties != null) { String sortColumns = properties.get(CarbonCommonConstants.SORT_COLUMNS); if (sortColumns != null) { wrapperColumnSchema.setSortColumn(true);
ColumnSchema newColumn = new ColumnSchema(); if (parentName != null) { newColumn.setColumnName(parentName + "." + field.getFieldName()); } else { newColumn.setColumnName(field.getFieldName()); newColumn.setDataType(field.getDataType()); if (isSortColumn || field.getDataType() == DataTypes.STRING || field.getDataType().isComplexType() || (isComplexChild)) { newColumn.setDimensionColumn(true); } else { newColumn.setDimensionColumn(false); newColumn.setSchemaOrdinal(ordinal++); } else { newColumn.setSchemaOrdinal(-1); newColumn.setColumnUniqueId(field.getFieldName()); newColumn.setColumnReferenceId(newColumn.getColumnUniqueId()); newColumn .setEncodingList(createEncoding(field.getDataType(), isInvertedIdxColumn, isComplexChild)); if (field.getDataType().isComplexType()) { if (DataTypes.isArrayType(field.getDataType()) || DataTypes.isMapType(field.getDataType())) { newColumn.setNumberOfChild(1); } else {
ColumnSchema id = new ColumnSchema(); id.setColumnName("id"); id.setDataType(DataTypes.INT); id.setEncodingList(encodings); id.setColumnUniqueId(UUID.randomUUID().toString()); id.setColumnReferenceId(id.getColumnUniqueId()); id.setDimensionColumn(true); id.setSchemaOrdinal(schemaOrdinal++); if (sortColumns.contains(id.getColumnName())) { id.setSortColumn(true); ColumnSchema date = new ColumnSchema(); date.setColumnName("date"); date.setDataType(DataTypes.STRING); date.setEncodingList(encodings); date.setColumnUniqueId(UUID.randomUUID().toString()); date.setDimensionColumn(true); date.setColumnReferenceId(date.getColumnUniqueId()); date.setSchemaOrdinal(schemaOrdinal++); if (sortColumns.contains(date.getColumnName())) { date.setSortColumn(true); ColumnSchema country = new ColumnSchema(); country.setColumnName("country"); country.setDataType(DataTypes.STRING); country.setEncodingList(encodings); country.setColumnUniqueId(UUID.randomUUID().toString()); country.setDimensionColumn(true); country.setSortColumn(true);
private static void updateDecimalType(TableInfo tableInfo) { List<ColumnSchema> deserializedColumns = tableInfo.getFactTable().getListOfColumns(); for (ColumnSchema column : deserializedColumns) { DataType dataType = column.getDataType(); if (DataTypes.isDecimal(dataType)) { column.setDataType(DataTypes.createDecimalType(column.getPrecision(), column.getScale())); } } if (tableInfo.getFactTable().getPartitionInfo() != null) { List<ColumnSchema> partitionColumns = tableInfo.getFactTable().getPartitionInfo().getColumnSchemaList(); for (ColumnSchema column : partitionColumns) { DataType dataType = column.getDataType(); if (DataTypes.isDecimal(dataType)) { column.setDataType(DataTypes.createDecimalType(column.getPrecision(), column.getScale())); } } } }
private static String parseStringToBigDecimal(String value, ColumnSchema columnSchema) { BigDecimal bigDecimal = new BigDecimal(value).setScale(columnSchema.getScale(), RoundingMode.HALF_UP); BigDecimal normalizedValue = normalizeDecimalValue(bigDecimal, columnSchema.getPrecision()); if (null != normalizedValue) { return normalizedValue.toString(); } return null; }
while (counter < columnsInTable.size()) { columnSchema = columnsInTable.get(counter); if (columnSchema.isDimensionColumn()) { tableOrdinal++; if (CarbonUtil.hasEncoding(columnSchema.getEncodingList(), Encoding.DICTIONARY) && !isComplexDimensionStarted && columnSchema.getNumberOfChild() == 0) { cardinalityIndexForNormalDimensionColumn.add(tableOrdinal); if (columnSchema.isSortColumn()) { this.numberOfSortColumns++; else if (isComplexDimensionStarted || columnSchema.getDataType().isComplexType()) { cardinalityIndexForComplexDimensionColumn.add(tableOrdinal); carbonDimension = new CarbonDimension(columnSchema, dimensonOrdinal++, -1, ++complexTypeOrdinal); carbonDimension.initializeChildDimensionsList(columnSchema.getNumberOfChild()); complexDimensions.add(carbonDimension); isComplexDimensionStarted = true; int previouseOrdinal = dimensonOrdinal; dimensonOrdinal = readAllComplexTypeChildren(dimensonOrdinal, columnSchema.getNumberOfChild(), columnsInTable, carbonDimension, complexTypeOrdinal); int numberOfChildrenDimensionAdded = dimensonOrdinal - previouseOrdinal; if (columnSchema.isSortColumn()) { this.numberOfSortColumns++; this.numberOfNoDictSortColumns++;
/** * @return the dataType */ public DataType getDataType() { return columnSchema.getDataType(); }
/** * @return the colName */ public String getColName() { return columnSchema.getColumnName(); }
for (int i = 0; i < listOfColumns.size(); i++) { ColumnSchema columnSchema = listOfColumns.get(i); if (columnSchema.isDimensionColumn()) { if (columnSchema.getNumberOfChild() > 0) { CarbonDimension complexDimension = new CarbonDimension(columnSchema, dimensionOrdinal++, columnSchema.getSchemaOrdinal(), -1, ++complexTypeOrdinal); complexDimension.initializeChildDimensionsList(columnSchema.getNumberOfChild()); allDimensions.add(complexDimension); dimensionOrdinal = readAllComplexTypeChildrens(dimensionOrdinal, columnSchema.getNumberOfChild(), listOfColumns, complexDimension, primitiveDimensions); i = dimensionOrdinal - 1; complexTypeOrdinal = assignComplexOrdinal(complexDimension, complexTypeOrdinal); } else { if (!columnSchema.isInvisible() && columnSchema.isSortColumn()) { this.numberOfSortColumns++; if (!columnSchema.getEncodingList().contains(Encoding.DICTIONARY)) { CarbonDimension dimension = new CarbonDimension(columnSchema, dimensionOrdinal++, columnSchema.getSchemaOrdinal(), -1, -1); if (!columnSchema.isInvisible() && columnSchema.isSortColumn()) { this.numberOfNoDictSortColumns++; } else if (columnSchema.getEncodingList().contains(Encoding.DICTIONARY)) { CarbonDimension dimension = new CarbonDimension(columnSchema, dimensionOrdinal++, columnSchema.getSchemaOrdinal(), keyOrdinal++, -1);
if (!isDefaultValueNull(defaultValue)) { String value; DataType dataType = columnSchema.getDataType(); if (dataType == DataTypes.SHORT) { value = new String(defaultValue, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET)); } else if (DataTypes.isDecimal(dataType)) { BigDecimal decimal = DataTypeUtil.byteToBigDecimal(defaultValue); if (columnSchema.getScale() > decimal.scale()) { decimal = decimal.setScale(columnSchema.getScale(), RoundingMode.HALF_UP);
ProjectionDimension currentBlockDimension = new ProjectionDimension(tableDimension); tableDimension.getColumnSchema() .setPrecision(queryDimension.getDimension().getColumnSchema().getPrecision()); tableDimension.getColumnSchema() .setScale(queryDimension.getDimension().getColumnSchema().getScale()); tableDimension.getColumnSchema() .setDefaultValue(queryDimension.getDimension().getDefaultValue()); currentBlockDimension.setOrdinal(queryDimension.getOrdinal()); presentDimension.add(currentBlockDimension);
for (ColumnSchema columnSchema : wrapperColumnSchema) { if (columnSchema.isLocalDictColumn()) { columnLocalDictGenMap.put(columnSchema.getColumnName(), new ColumnLocalDictionaryGenerator(localDictionaryThreshold, columnSchema.getDataType() == DataTypes.VARCHAR ? CarbonCommonConstants.INT_SIZE_IN_BYTE : CarbonCommonConstants.SHORT_SIZE_IN_BYTE));
/** * @return the scale */ public int getScale() { return columnSchema.getScale(); }
} catch (IOException e) { LOGGER.error("Failed to delete dictionary or sortIndex file for column " + columnSchema.getColumnName() + "with column ID " + columnSchema.getColumnUniqueId()); removeDictionaryColumnFromCache(identifier, columnSchema.getColumnUniqueId());