@Override protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) { // no dictionary measure columns will be of original data, so convert it to bytes if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) { return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); } return (byte[]) value; }
protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) { if (DataTypes.VARCHAR == indexColumns.get(indexColIdx).getDataType()) { return DataConvertUtil.getRawBytesForVarchar((byte[]) value); } else if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) { // get bytes for the original value of the no dictionary column return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); } else { return DataConvertUtil.getRawBytes((byte[]) value); } }
public NonDictionaryFieldConverterImpl(DataField dataField, String nullformat, int index, boolean isEmptyBadRecord) { this.dataField = dataField; this.dataType = dataField.getColumn().getDataType(); this.column = dataField.getColumn(); this.index = index; this.nullformat = nullformat; this.isEmptyBadRecord = isEmptyBadRecord; }
private static String getComplexTypeString(DataField[] dataFields) { StringBuilder dimString = new StringBuilder(); for (DataField dataField : dataFields) { if (dataField.getColumn().getDataType().isComplexType()) { addAllComplexTypeChildren((CarbonDimension) dataField.getColumn(), dimString, ""); dimString.append(CarbonCommonConstants.SEMICOLON_SPC_CHARACTER); } } return dimString.toString(); }
public DataType[] getMeasureDataType() { List<Integer> measureIndexes = new ArrayList<>(dataFields.length); int measureCount = 0; for (int i = 0; i < dataFields.length; i++) { if (!dataFields[i].getColumn().isDimension()) { measureIndexes.add(i); measureCount++; } } DataType[] type = new DataType[measureCount]; for (int i = 0; i < type.length; i++) { type[i] = dataFields[measureIndexes.get(i)].getColumn().getDataType(); } return type; }
public CarbonColumn(ColumnSchema columnSchema, int ordinal, int schemaOrdinal) { this.columnSchema = columnSchema; this.ordinal = ordinal; this.schemaOrdinal = schemaOrdinal; this.columnIdentifier = new ColumnIdentifier(getColumnId(), getColumnProperties(), getDataType()); }
if (DataType.TIMESTAMP == carbonColumns[i].getDataType()) { data[i] = new Timestamp((long) data[i] / 1000L); } else if (DataType.DATE == carbonColumns[i].getDataType()) { data[i] = new Date((long) data[i]);
if (DataTypeUtil.isPrimitiveColumn(noDicAndComplexColumns[i].getDataType())) { noDictKeys[i] = DataTypeUtil .getDataBasedOnDataTypeForNoDictionaryColumn(noDictionaryKeys[i], noDicAndComplexColumns[i].getDataType()); && noDicAndComplexColumns[i].getDataType() == DataTypes.TIMESTAMP) { noDictKeys[i] = (long) noDictKeys[i] / 1000L;
public DirectDictionaryFieldConverterImpl(DataField dataField, String nullFormat, int index, boolean isEmptyBadRecord) { this.nullFormat = nullFormat; this.column = dataField.getColumn(); if (dataField.getColumn().getDataType() == DataTypes.DATE && dataField.getDateFormat() != null && !dataField.getDateFormat().isEmpty()) { this.directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory .getDirectDictionaryGenerator(dataField.getColumn().getDataType(), dataField.getDateFormat()); } else if (dataField.getColumn().getDataType() == DataTypes.TIMESTAMP && dataField.getTimestampFormat() != null && !dataField.getTimestampFormat().isEmpty()) { this.directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory .getDirectDictionaryGenerator(dataField.getColumn().getDataType(), dataField.getTimestampFormat()); } else { this.directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory .getDirectDictionaryGenerator(dataField.getColumn().getDataType()); } this.index = index; this.isEmptyBadRecord = isEmptyBadRecord; }
protected void addValue2BloomIndex(int indexColIdx, Object value) { byte[] indexValue; // convert measure to bytes // convert non-dict dimensions to simple bytes without length // convert internal-dict dimensions to simple bytes without any encode if (indexColumns.get(indexColIdx).isMeasure()) { // NULL value of all measures are already processed in `ColumnPage.getData` // or `RawBytesReadSupport.readRow` with actual data type // Carbon stores boolean as byte. Here we convert it for `getValueAsBytes` if (indexColumns.get(indexColIdx).getDataType().equals(DataTypes.BOOLEAN)) { value = BooleanConvert.boolean2Byte((Boolean)value); } indexValue = CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); } else { if (indexColumns.get(indexColIdx).hasEncoding(Encoding.DICTIONARY) || indexColumns.get(indexColIdx).hasEncoding(Encoding.DIRECT_DICTIONARY)) { indexValue = convertDictionaryValue(indexColIdx, value); } else { indexValue = convertNonDictionaryValue(indexColIdx, value); } } if (indexValue.length == 0) { indexValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; } indexBloomFilters.get(indexColIdx).add(new Key(indexValue)); }
/** * Further validate whether it is string column and dictionary column. * Currently only string and non-dictionary column is supported for Lucene DataMap */ @Override public void validate() throws MalformedDataMapCommandException { super.validate(); List<CarbonColumn> indexColumns = getCarbonTable().getIndexedColumns(getDataMapSchema()); for (CarbonColumn column : indexColumns) { if (column.getDataType() != DataTypes.STRING) { throw new MalformedDataMapCommandException(String.format( "Only String column is supported, column '%s' is %s type. ", column.getColName(), column.getDataType())); } else if (column.getEncoder().contains(Encoding.DICTIONARY)) { throw new MalformedDataMapCommandException(String.format( "Dictionary column is not supported, column '%s' is dictionary column", column.getColName())); } } } }
/** * Match the columns for transactional and non transactional tables * @param isTransactionalTable * @param queryColumn * @param tableColumn * @return */ private static boolean isColumnMatches(boolean isTransactionalTable, CarbonColumn queryColumn, CarbonColumn tableColumn) { // If it is non transactional table just check the column names, no need to validate // column id as multiple sdk's output placed in a single folder doesn't have same // column ID but can have same column name if (tableColumn.getDataType().isComplexType() && !(tableColumn.getDataType().getId() == DataTypes.ARRAY_TYPE_ID)) { if (tableColumn.getColumnId().equalsIgnoreCase(queryColumn.getColumnId())) { return true; } else { return isColumnMatchesStruct(tableColumn, queryColumn); } } else { return (tableColumn.getColumnId().equalsIgnoreCase(queryColumn.getColumnId()) || ( !isTransactionalTable && tableColumn.getColName() .equalsIgnoreCase(queryColumn.getColName()))); } }
@Override public Object convert(Object value, BadRecordLogHolder logHolder) throws RuntimeException { String literalValue = (String) value; if (literalValue == null) { logHolder.setReason( CarbonDataProcessorUtil.prepareFailureReason(column.getColName(), column.getDataType())); return CarbonCommonConstants.DIRECT_DICT_VALUE_NULL; } else if (literalValue.equals(nullFormat)) { return CarbonCommonConstants.DIRECT_DICT_VALUE_NULL; } else { int key = directDictionaryGenerator.generateDirectSurrogateKey(literalValue); if (key == CarbonCommonConstants.DIRECT_DICT_VALUE_NULL) { if ((literalValue.length() > 0) || (literalValue.length() == 0 && isEmptyBadRecord)) { String message = logHolder.getColumnMessageMap().get(column.getColName()); if (null == message) { message = CarbonDataProcessorUtil.prepareFailureReason( column.getColName(), column.getDataType()); logHolder.getColumnMessageMap().put(column.getColName(), message); } logHolder.setReason(message); } } return key; } }
public static GenericQueryType[] getComplexDimensions(CarbonTable carbontable, CarbonColumn[] carbonColumns, Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache) throws IOException { GenericQueryType[] queryTypes = new GenericQueryType[carbonColumns.length]; for (int i = 0; i < carbonColumns.length; i++) { if (carbonColumns[i].isComplex()) { if (DataTypes.isArrayType(carbonColumns[i].getDataType())) { queryTypes[i] = new ArrayQueryType(carbonColumns[i].getColName(), carbonColumns[i].getColName(), i); } else if (DataTypes.isStructType(carbonColumns[i].getDataType())) { queryTypes[i] = new StructQueryType(carbonColumns[i].getColName(), carbonColumns[i].getColName(), i); } else { throw new UnsupportedOperationException( carbonColumns[i].getDataType().getName() + " is not supported"); } fillChildren(carbontable, queryTypes[i], (CarbonDimension) carbonColumns[i], i, cache); } } return queryTypes; }
Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET)); row.setValues(new Object[] { DataTypeUtil.getDataBasedOnDataType(stringValue, columnExpression.getCarbonColumn().getDataType()) }); Boolean rslt = expression.evaluate(row).getBoolean(); if (null != rslt) {
/** * This initialization is done inside executor task * for column dictionary involved in decoding. * * @param carbonColumns column list * @param carbonTable table identifier */ @Override public void initialize(CarbonColumn[] carbonColumns, CarbonTable carbonTable) throws IOException { this.carbonColumns = carbonColumns; dictionaries = new Dictionary[carbonColumns.length]; dataTypes = new DataType[carbonColumns.length]; for (int i = 0; i < carbonColumns.length; i++) { if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i] .hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) { CacheProvider cacheProvider = CacheProvider.getInstance(); Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider .createCache(CacheType.FORWARD_DICTIONARY); dataTypes[i] = carbonColumns[i].getDataType(); String dictionaryPath = carbonTable.getTableInfo().getFactTable().getTableProperties() .get(CarbonCommonConstants.DICTIONARY_PATH); dictionaries[i] = forwardDictionaryCache.get(new DictionaryColumnUniqueIdentifier( carbonTable.getAbsoluteTableIdentifier(), carbonColumns[i].getColumnIdentifier(), dataTypes[i], dictionaryPath)); } else { dataTypes[i] = carbonColumns[i].getDataType(); } } }
for (int i = 0; i < measureCount; i++) { measureDataTypes[i] = dataFields[dimensionWithComplexCount + i].getColumn().getDataType();
@Override public void initialize() throws IOException { super.initialize(); // if logger is enabled then raw data will be required. RowConverterImpl rowConverter = new RowConverterImpl(configuration.getDataFields(), configuration, null); rowConverter.initialize(); configuration.setCardinalityFinder(rowConverter); noDictionaryMapping = CarbonDataProcessorUtil.getNoDictionaryMapping(configuration.getDataFields()); dataFieldsWithComplexDataType = new HashMap<>(); convertComplexDataType(dataFieldsWithComplexDataType); dataTypes = new DataType[configuration.getDataFields().length]; for (int i = 0; i < dataTypes.length; i++) { if (configuration.getDataFields()[i].getColumn().hasEncoding(Encoding.DICTIONARY)) { dataTypes[i] = DataTypes.INT; } else { dataTypes[i] = configuration.getDataFields()[i].getColumn().getDataType(); } } orderOfData = arrangeData(configuration.getDataFields(), configuration.getHeader()); }
.setColumnIndex(columnExpression.getCarbonColumn().getOrdinal()); msrColumnEvalutorInfo.setMeasure(columnExpression.getMeasure()); msrColumnEvalutorInfo.setType(columnExpression.getCarbonColumn().getDataType()); msrColEvalutorInfoList.add(msrColumnEvalutorInfo);
DataType dataType = carbonColumn.getDataType(); if (DataTypes.isArrayType(dataType) || DataTypes.isMapType(dataType)) { List<CarbonDimension> listOfChildDimensions =