/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { ColumnarBatch batch = ColumnarBatch.allocate(schema, memMode); int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(batch.column(i), schema.fields()[i].dataType(), r, i); } n++; } batch.setNumRows(n); return batch; } }
public static Map<Integer, Integer> toJavaIntMap(ColumnarMap map) { int[] keys = toJavaIntArray(map.keyArray()); int[] values = toJavaIntArray(map.valueArray()); assert keys.length == values.length; Map<Integer, Integer> result = new HashMap<>(); for (int i = 0; i < keys.length; i++) { result.put(keys[i], values[i]); } return result; }
int partitionIdx = sparkSchema.fields().length; for (int i = 0; i < partitionColumns.fields().length; i++) { ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i); columnVectors[i + partitionIdx].setIsConstant();
int partitionIdx = sparkSchema.fields().length; for (int i = 0; i < partitionColumns.fields().length; i++) { ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i); columnVectors[i + partitionIdx].setIsConstant();
int partitionIdx = requiredFields.length; for (int i = 0; i < partitionValues.numFields(); i++) { ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i); columnVectors[i + partitionIdx].setIsConstant(); DataType dt = partitionSchema.fields()[i].dataType(); OnHeapColumnVector partitionCol = new OnHeapColumnVector(capacity, dt); ColumnVectorUtils.populate(partitionCol, partitionValues, i); partitionCol.setIsConstant(); orcVectorWrappers[partitionIdx + i] = partitionCol;
/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { int capacity = 4 * 1024; WritableColumnVector[] columnVectors; if (memMode == MemoryMode.OFF_HEAP) { columnVectors = OffHeapColumnVector.allocateColumns(capacity, schema); } else { columnVectors = OnHeapColumnVector.allocateColumns(capacity, schema); } int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(columnVectors[i], schema.fields()[i].dataType(), r, i); } n++; } ColumnarBatch batch = new ColumnarBatch(columnVectors); batch.setNumRows(n); return batch; } }
public static Map<Integer, Integer> toJavaIntMap(ColumnarMap map) { int[] keys = toJavaIntArray(map.keyArray()); int[] values = toJavaIntArray(map.valueArray()); assert keys.length == values.length; Map<Integer, Integer> result = new HashMap<>(); for (int i = 0; i < keys.length; i++) { result.put(keys[i], values[i]); } return result; }
int partitionIdx = requiredFields.length; for (int i = 0; i < partitionValues.numFields(); i++) { ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i); columnVectors[i + partitionIdx].setIsConstant(); DataType dt = partitionSchema.fields()[i].dataType(); OnHeapColumnVector partitionCol = new OnHeapColumnVector(capacity, dt); ColumnVectorUtils.populate(partitionCol, partitionValues, i); partitionCol.setIsConstant(); orcVectorWrappers[partitionIdx + i] = partitionCol;
/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { int capacity = 4 * 1024; WritableColumnVector[] columnVectors; if (memMode == MemoryMode.OFF_HEAP) { columnVectors = OffHeapColumnVector.allocateColumns(capacity, schema); } else { columnVectors = OnHeapColumnVector.allocateColumns(capacity, schema); } int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(columnVectors[i], schema.fields()[i].dataType(), r, i); } n++; } ColumnarBatch batch = new ColumnarBatch(columnVectors); batch.setNumRows(n); return batch; } }
int partitionIdx = sparkSchema.fields().length; for (int i = 0; i < partitionColumns.fields().length; i++) { ColumnVectorUtils.populate(columnarBatch.column(i + partitionIdx), partitionValues, i); columnarBatch.column(i + partitionIdx).setIsConstant();
private static void appendValue(WritableColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChild(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
int partitionIdx = sparkSchema.fields().length; for (int i = 0; i < partitionColumns.fields().length; i++) { ColumnVectorUtils.populate(columnarBatch.column(i + partitionIdx), partitionValues, i); columnarBatch.column(i + partitionIdx).setIsConstant();
private static void appendValue(ColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChildColumn(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
int partitionIdx = fields.length; for (int i = 0; i < partitionColumns.fields().length; i++) { ColumnVectorUtils.populate(vectorProxy.column(i + partitionIdx), partitionValues, i); vectorProxy.column(i + partitionIdx).setIsConstant();
private static void appendValue(WritableColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChild(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
/** * Converts an iterator of rows into a single ColumnBatch. */ public static ColumnarBatch toBatch( StructType schema, MemoryMode memMode, Iterator<Row> row) { ColumnarBatch batch = ColumnarBatch.allocate(schema, memMode); int n = 0; while (row.hasNext()) { Row r = row.next(); for (int i = 0; i < schema.fields().length; i++) { appendValue(batch.column(i), schema.fields()[i].dataType(), r, i); } n++; } batch.setNumRows(n); return batch; } }
private static void appendValue(ColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChildColumn(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }