public Object deserialize(Writable writable) { final ArrowWrapperWritable arrowWrapperWritable = (ArrowWrapperWritable) writable; final VectorSchemaRoot vectorSchemaRoot = arrowWrapperWritable.getVectorSchemaRoot(); final List<FieldVector> fieldVectors = vectorSchemaRoot.getFieldVectors(); final int fieldCount = fieldVectors.size(); final int rowCount = vectorSchemaRoot.getRowCount(); vectorizedRowBatch.ensureSize(rowCount); if (rows == null || rows.length < rowCount ) { rows = new Object[rowCount][]; for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { rows[rowIndex] = new Object[fieldCount]; } } for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) { final FieldVector fieldVector = fieldVectors.get(fieldIndex); final int projectedCol = vectorizedRowBatch.projectedColumns[fieldIndex]; final ColumnVector columnVector = vectorizedRowBatch.cols[projectedCol]; final TypeInfo typeInfo = serDe.rowTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex); read(fieldVector, columnVector, typeInfo); } for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { vectorExtractRow.extractRow(vectorizedRowBatch, rowIndex, rows[rowIndex]); } vectorizedRowBatch.reset(); return rows; }
/** * Validate two arrow vectorSchemaRoot are equal. * * @param root1 the 1st schema to compare * @param root2 the 2nd schema to compare * @throws IllegalArgumentException if they are different. */ public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaRoot root2) { compareSchemas(root2.getSchema(), root1.getSchema()); if (root1.getRowCount() != root2.getRowCount()) { throw new IllegalArgumentException("Different row count:\n" + root1.getRowCount() + " != " + root2.getRowCount()); } List<FieldVector> vectors1 = root1.getFieldVectors(); List<FieldVector> vectors2 = root2.getFieldVectors(); if (vectors1.size() != vectors2.size()) { throw new IllegalArgumentException("Different column count:\n" + vectors1.toString() + "\n!=\n" + vectors2.toString()); } for (int i = 0; i < vectors1.size(); i++) { compareFieldVectors(vectors1.get(i), vectors2.get(i)); } }
public ArrowRecordBatch getRecordBatch() { List<ArrowFieldNode> nodes = new ArrayList<>(); List<ArrowBuf> buffers = new ArrayList<>(); for (FieldVector vector : root.getFieldVectors()) { appendNodes(vector, nodes, buffers); } return new ArrowRecordBatch(root.getRowCount(), nodes, buffers, alignBuffers); }
private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { generator.writeStartObject(); { generator.writeObjectField("count", recordBatch.getRowCount()); generator.writeArrayFieldStart("columns"); for (Field field : recordBatch.getSchema().getFields()) { FieldVector vector = recordBatch.getVector(field.getName()); writeFromVectorIntoJson(field, vector); } generator.writeEndArray(); } generator.writeEndObject(); }