@Override public Object transform(Object input) { return ((CarbonColumn) input).getColName(); } });
private static String extractChildColumnName(CarbonColumn column) { String columnName = column.getColName(); if (columnName.contains(".")) { // complex type child column names can be like following // a) struct type --> parent.child // b) array type --> parent.val.val...child [If create table flow] // c) array type --> parent.val0.val1...child [If SDK flow] // But json data's key is only child column name. So, extracting below String[] splits = columnName.split("\\."); columnName = splits[splits.length - 1]; } return columnName; } }
/** * @param tableName * @param columnName * @return */ public CarbonColumn getColumnByName(String tableName, String columnName) { List<CarbonColumn> columns = createOrderColumn.get(tableName); Iterator<CarbonColumn> colItr = columns.iterator(); while (colItr.hasNext()) { CarbonColumn col = colItr.next(); if (col.getColName().equalsIgnoreCase(columnName)) { return col; } } return null; }
private int[] arrangeData(DataField[] dataFields, String[] header) { int[] data = new int[dataFields.length]; for (int i = 0; i < dataFields.length; i++) { for (int j = 0; j < header.length; j++) { if (dataFields[i].getColumn().getColName().equalsIgnoreCase(header[j])) { data[i] = j; break; } } } return data; }
@Override public boolean evaluate(Object object) { return ((CarbonColumn) object).getColName().equalsIgnoreCase(dimension.getColName()); } });
/** * This method will give storage order column list */ public List<CarbonColumn> getStreamStorageOrderColumn(String tableName) { List<CarbonDimension> dimensions = tableDimensionsMap.get(tableName); List<CarbonMeasure> measures = tableMeasuresMap.get(tableName); List<CarbonColumn> columnList = new ArrayList<>(dimensions.size() + measures.size()); List<CarbonColumn> complexDimensionList = new ArrayList<>(dimensions.size()); for (CarbonColumn column : dimensions) { if (column.isComplex()) { complexDimensionList.add(column); } else { columnList.add(column); } } columnList.addAll(complexDimensionList); for (CarbonColumn column : measures) { if (!(column.getColName().equals("default_dummy_measure"))) { columnList.add(column); } } return columnList; }
/** * Validate INDEX_COLUMNS property and return a array containing index column name * Following will be validated * 1. require INDEX_COLUMNS property * 2. INDEX_COLUMNS can't contains illegal argument(empty, blank) * 3. INDEX_COLUMNS can't contains duplicate same columns * 4. INDEX_COLUMNS should be exists in table columns */ public void validate() throws MalformedDataMapCommandException { List<CarbonColumn> indexColumns = carbonTable.getIndexedColumns(dataMapSchema); Set<String> unique = new HashSet<>(); for (CarbonColumn indexColumn : indexColumns) { unique.add(indexColumn.getColName()); } if (unique.size() != indexColumns.size()) { throw new MalformedDataMapCommandException(INDEX_COLUMNS + " has duplicate column"); } }
/** * Pick corresponding column pages and add to all registered datamap * * @param pageId sequence number of page, start from 0 * @param tablePage page data */ public void onPageAdded(int blockletId, int pageId, TablePage tablePage) throws IOException { Set<Map.Entry<List<CarbonColumn>, List<DataMapWriter>>> entries = registry.entrySet(); for (Map.Entry<List<CarbonColumn>, List<DataMapWriter>> entry : entries) { List<CarbonColumn> indexedColumns = entry.getKey(); ColumnPage[] pages = new ColumnPage[indexedColumns.size()]; for (int i = 0; i < indexedColumns.size(); i++) { pages[i] = tablePage.getColumnPage(indexedColumns.get(i).getColName()); } List<DataMapWriter> writers = entry.getValue(); int pageSize = pages[0].getPageSize(); for (DataMapWriter writer : writers) { writer.onPageAdded(blockletId, pageId, pageSize, pages); } } }
@Override protected byte[] convertDictionaryValue(int indexColIdx, Object value) { // input value from onPageAdded in load process is byte[] // for dict columns including dictionary and date columns decode value to get the surrogate key int thisKeyIdx = indexCol2MdkIdx.get(indexColumns.get(indexColIdx).getColName()); int surrogateKey = CarbonUtil.getSurrogateInternal((byte[]) value, 0, columnarSplitter.getBlockKeySize()[thisKeyIdx]); // store the dictionary key in bloom return CarbonUtil.getValueAsBytes(DataTypes.INT, surrogateKey); } }
public static void addData(LuceneColumnKeys key, int rowId, int pageId, int blockletId, ByteBuffer intBuffer, IndexWriter indexWriter, List<CarbonColumn> indexCols, boolean storeBlockletWise) throws IOException { Document document = new Document(); for (int i = 0; i < key.getColValues().length; i++) { addField(document, key.getColValues()[i], indexCols.get(i).getColName(), Field.Store.NO); } intBuffer.clear(); if (storeBlockletWise) { // No need to store blocklet id to it. intBuffer.putShort((short) pageId); intBuffer.putShort((short) rowId); intBuffer.rewind(); document.add(new StoredField(ROWID_NAME, intBuffer.getInt())); } else { intBuffer.putShort((short) blockletId); intBuffer.putShort((short) pageId); intBuffer.rewind(); document.add(new StoredField(PAGEID_NAME, intBuffer.getInt())); document.add(new StoredField(ROWID_NAME, (short) rowId)); } indexWriter.addDocument(document); }
/** * Get all index columns specified by dataMapSchema */ public List<CarbonColumn> getIndexedColumns(DataMapSchema dataMapSchema) throws MalformedDataMapCommandException { String[] columns = dataMapSchema.getIndexColumns(); List<CarbonColumn> indexColumn = new ArrayList<>(columns.length); for (String column : columns) { CarbonColumn carbonColumn = getColumnByName(getTableName(), column.trim().toLowerCase()); if (carbonColumn == null) { throw new MalformedDataMapCommandException(String.format( "column '%s' does not exist in table. Please check create DataMap statement.", column)); } if (carbonColumn.getColName().isEmpty()) { throw new MalformedDataMapCommandException( CarbonCommonConstants.INDEX_COLUMNS + " contains invalid column name"); } indexColumn.add(carbonColumn); } return indexColumn; }
@Override public Object convert(Object value, BadRecordLogHolder logHolder) throws RuntimeException { String literalValue = (String) value; if (literalValue == null) { logHolder.setReason( CarbonDataProcessorUtil.prepareFailureReason(column.getColName(), column.getDataType())); return CarbonCommonConstants.DIRECT_DICT_VALUE_NULL; } else if (literalValue.equals(nullFormat)) { return CarbonCommonConstants.DIRECT_DICT_VALUE_NULL; } else { int key = directDictionaryGenerator.generateDirectSurrogateKey(literalValue); if (key == CarbonCommonConstants.DIRECT_DICT_VALUE_NULL) { if ((literalValue.length() > 0) || (literalValue.length() == 0 && isEmptyBadRecord)) { String message = logHolder.getColumnMessageMap().get(column.getColName()); if (null == message) { message = CarbonDataProcessorUtil.prepareFailureReason( column.getColName(), column.getDataType()); logHolder.getColumnMessageMap().put(column.getColName(), message); } logHolder.setReason(message); } } return key; } }
public DataField[] getInput(CarbonDataLoadConfiguration configuration) { DataField[] fields = configuration.getDataFields(); String[] header = configuration.getHeader(); numberOfColumns = header.length; DataField[] input = new DataField[fields.length]; inputMapping = new int[input.length]; int k = 0; for (int i = 0; i < fields.length; i++) { for (int j = 0; j < numberOfColumns; j++) { if (header[j].equalsIgnoreCase(fields[i].getColumn().getColName())) { input[k] = fields[i]; inputMapping[k] = j; k++; break; } } } return input; }
/** * Create parser for the carbon column. */ public static GenericDataType createComplexDataType(DataField dataField, AbsoluteTableIdentifier absoluteTableIdentifier, DictionaryClient client, Boolean useOnePass, Map<Object, Integer> localCache, int index, String nullFormat, Boolean isEmptyBadRecords) { return createComplexType(dataField.getColumn(), dataField.getColumn().getColName(), absoluteTableIdentifier, client, useOnePass, localCache, index, nullFormat, isEmptyBadRecords); }
@Override public void clear(Segment segment) { Set<String> shards = segmentMap.remove(segment.getSegmentNo()); if (shards != null) { for (String shard : shards) { for (CarbonColumn carbonColumn : dataMapMeta.getIndexedColumns()) { cache.invalidate(new BloomCacheKeyValue.CacheKey(shard, carbonColumn.getColName())); } } } }
public static GenericQueryType[] getComplexDimensions(CarbonTable carbontable, CarbonColumn[] carbonColumns, Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache) throws IOException { GenericQueryType[] queryTypes = new GenericQueryType[carbonColumns.length]; for (int i = 0; i < carbonColumns.length; i++) { if (carbonColumns[i].isComplex()) { if (DataTypes.isArrayType(carbonColumns[i].getDataType())) { queryTypes[i] = new ArrayQueryType(carbonColumns[i].getColName(), carbonColumns[i].getColName(), i); } else if (DataTypes.isStructType(carbonColumns[i].getDataType())) { queryTypes[i] = new StructQueryType(carbonColumns[i].getColName(), carbonColumns[i].getColName(), i); } else { throw new UnsupportedOperationException( carbonColumns[i].getDataType().getName() + " is not supported"); } fillChildren(carbontable, queryTypes[i], (CarbonDimension) carbonColumns[i], i, cache); } } return queryTypes; }
/** * Further validate whether it is string column and dictionary column. * Currently only string and non-dictionary column is supported for Lucene DataMap */ @Override public void validate() throws MalformedDataMapCommandException { super.validate(); List<CarbonColumn> indexColumns = getCarbonTable().getIndexedColumns(getDataMapSchema()); for (CarbonColumn column : indexColumns) { if (column.getDataType() != DataTypes.STRING) { throw new MalformedDataMapCommandException(String.format( "Only String column is supported, column '%s' is %s type. ", column.getColName(), column.getDataType())); } else if (column.getEncoder().contains(Encoding.DICTIONARY)) { throw new MalformedDataMapCommandException(String.format( "Dictionary column is not supported, column '%s' is dictionary column", column.getColName())); } } } }
private void initDataMapFile() throws IOException { if (!FileFactory.isFileExist(dataMapPath)) { if (!FileFactory.mkdirs(dataMapPath, FileFactory.getFileType(dataMapPath))) { throw new IOException("Failed to create directory " + dataMapPath); } } for (int indexColId = 0; indexColId < indexColumns.size(); indexColId++) { String dmFile = BloomIndexFileStore.getBloomIndexFile(dataMapPath, indexColumns.get(indexColId).getColName()); DataOutputStream dataOutStream = null; try { FileFactory.createNewFile(dmFile, FileFactory.getFileType(dmFile)); dataOutStream = FileFactory.getDataOutputStream(dmFile, FileFactory.getFileType(dmFile)); } catch (IOException e) { CarbonUtil.closeStreams(dataOutStream); throw new IOException(e); } this.currentDMFiles.add(dmFile); this.currentDataOutStreams.add(dataOutStream); } }
/** * initialize partitioner for bucket column */ private void initializeBucketColumnPartitioner() { List<Integer> indexes = new ArrayList<>(); List<ColumnSchema> columnSchemas = new ArrayList<>(); DataField[] inputDataFields = getOutput(); BucketingInfo bucketingInfo = configuration.getBucketingInfo(); for (int i = 0; i < inputDataFields.length; i++) { for (int j = 0; j < bucketingInfo.getListOfColumns().size(); j++) { if (inputDataFields[i].getColumn().getColName() .equals(bucketingInfo.getListOfColumns().get(j).getColumnName())) { indexes.add(i); columnSchemas.add(inputDataFields[i].getColumn().getColumnSchema()); break; } } } // hash partitioner to dispatch rows by bucket column this.partitioner = new HashPartitionerImpl(indexes, columnSchemas, bucketingInfo.getNumOfRanges()); }
/** * Match the columns for transactional and non transactional tables * @param isTransactionalTable * @param queryColumn * @param tableColumn * @return */ private static boolean isColumnMatches(boolean isTransactionalTable, CarbonColumn queryColumn, CarbonColumn tableColumn) { // If it is non transactional table just check the column names, no need to validate // column id as multiple sdk's output placed in a single folder doesn't have same // column ID but can have same column name if (tableColumn.getDataType().isComplexType() && !(tableColumn.getDataType().getId() == DataTypes.ARRAY_TYPE_ID)) { if (tableColumn.getColumnId().equalsIgnoreCase(queryColumn.getColumnId())) { return true; } else { return isColumnMatchesStruct(tableColumn, queryColumn); } } else { return (tableColumn.getColumnId().equalsIgnoreCase(queryColumn.getColumnId()) || ( !isTransactionalTable && tableColumn.getColName() .equalsIgnoreCase(queryColumn.getColName()))); } }