public AggregateHashMap(StructType schema, int capacity, double loadFactor, int maxSteps) { // We currently only support single key-value pair that are both longs assert (schema.size() == 2 && schema.fields()[0].dataType() == LongType && schema.fields()[1].dataType() == LongType); // capacity should be a power of 2 assert (capacity > 0 && ((capacity & (capacity - 1)) == 0)); this.maxSteps = maxSteps; numBuckets = (int) (capacity / loadFactor); batch = ColumnarBatch.allocate(schema, MemoryMode.ON_HEAP, capacity); buckets = new int[numBuckets]; Arrays.fill(buckets, -1); }
/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { ColumnarBatch batch = ColumnarBatch.allocate(schema, memMode); int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(batch.column(i), schema.fields()[i].dataType(), r, i); } n++; } batch.setNumRows(n); return batch; } }
columnarBatch = ColumnarBatch.allocate(batchSchema, memMode); if (partitionColumns != null) { int partitionIdx = sparkSchema.fields().length;
public AggregateHashMap(StructType schema, int capacity, double loadFactor, int maxSteps) { // We currently only support single key-value pair that are both longs assert (schema.size() == 2 && schema.fields()[0].dataType() == LongType && schema.fields()[1].dataType() == LongType); // capacity should be a power of 2 assert (capacity > 0 && ((capacity & (capacity - 1)) == 0)); this.maxSteps = maxSteps; numBuckets = (int) (capacity / loadFactor); batch = ColumnarBatch.allocate(schema, MemoryMode.ON_HEAP, capacity); buckets = new int[numBuckets]; Arrays.fill(buckets, -1); }
/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { ColumnarBatch batch = ColumnarBatch.allocate(schema, memMode); int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(batch.column(i), schema.fields()[i].dataType(), r, i); } n++; } batch.setNumRows(n); return batch; } }
/** * Adapter class which handles the columnar vector reading of the carbondata * based on the spark ColumnVector and ColumnarBatch API. This proxy class * handles the complexity of spark 2.3 version related api changes since * spark ColumnVector and ColumnarBatch interfaces are still evolving. * * @param memMode which represent the type onheap or offheap vector. * @param outputSchema, metadata related to current schema of table. * @param rowNum rows number for vector reading * @param useLazyLoad Whether to use lazy load while getting the data. */ public CarbonVectorProxy(MemoryMode memMode, StructType outputSchema, int rowNum, boolean useLazyLoad) { columnarBatch = ColumnarBatch.allocate(outputSchema, memMode, rowNum); columnVectorProxies = new ColumnVectorProxy[columnarBatch.numCols()]; for (int i = 0; i < columnVectorProxies.length; i++) { if (useLazyLoad) { columnVectorProxies[i] = new ColumnVectorProxyWithLazyLoad(columnarBatch.column(i), rowNum, memMode); } else { columnVectorProxies[i] = new ColumnVectorProxy(columnarBatch.column(i), rowNum, memMode); } } updateColumnVectors(); }
columnarBatch = ColumnarBatch.allocate(batchSchema, memMode); if (partitionColumns != null) { int partitionIdx = sparkSchema.fields().length;