@Override public boolean next(NullWritable key, ArrowWrapperWritable value) throws IOException { try { // Need a way to know what thread to interrupt, since this is a blocking thread. setReaderThread(Thread.currentThread()); boolean hasInput = arrowStreamReader.loadNextBatch(); if (hasInput) { VectorSchemaRoot vectorSchemaRoot = arrowStreamReader.getVectorSchemaRoot(); //There must be at least one column vector Preconditions.checkState(vectorSchemaRoot.getFieldVectors().size() > 0); if(vectorSchemaRoot.getFieldVectors().get(0).getValueCount() == 0) { //An empty batch will appear at the end of the stream return false; } value.setVectorSchemaRoot(arrowStreamReader.getVectorSchemaRoot()); return true; } else { processReaderEvent(); return false; } } catch (IOException io) { failOnInterruption(io); return false; } }
public Object deserialize(Writable writable) { final ArrowWrapperWritable arrowWrapperWritable = (ArrowWrapperWritable) writable; final VectorSchemaRoot vectorSchemaRoot = arrowWrapperWritable.getVectorSchemaRoot(); final List<FieldVector> fieldVectors = vectorSchemaRoot.getFieldVectors(); final int fieldCount = fieldVectors.size(); final int rowCount = vectorSchemaRoot.getRowCount(); vectorizedRowBatch.ensureSize(rowCount); if (rows == null || rows.length < rowCount ) { rows = new Object[rowCount][]; for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { rows[rowIndex] = new Object[fieldCount]; } } for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) { final FieldVector fieldVector = fieldVectors.get(fieldIndex); final int projectedCol = vectorizedRowBatch.projectedColumns[fieldIndex]; final ColumnVector columnVector = vectorizedRowBatch.cols[projectedCol]; final TypeInfo typeInfo = serDe.rowTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex); read(fieldVector, columnVector, typeInfo); } for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { vectorExtractRow.extractRow(vectorizedRowBatch, rowIndex, rows[rowIndex]); } vectorizedRowBatch.reset(); return rows; }
List<FieldVector> vectors = batchData.getVectorSchemaRoot().getFieldVectors();
@Test public void testPrimitiveCharPadding() throws SerDeException { String[][] schema = { {"char1", "char(10)"}, }; HiveCharWritable[][] rows = new HiveCharWritable[][] { {charW("Hello", 10)}, {charW("world!", 10)}}; ArrowColumnarBatchSerDe serDe = new ArrowColumnarBatchSerDe(); StructObjectInspector rowOI = initSerDe(serDe, schema); ArrowWrapperWritable serialized = null; for (Object[] row : rows) { serialized = serDe.serialize(row, rowOI); } // Pass null to complete a batch if (serialized == null) { serialized = serDe.serialize(null, rowOI); } VarCharVector varCharVector = (VarCharVector) serialized.getVectorSchemaRoot().getFieldVectors().get(0); for (int i = 0; i < rows.length; i++) { assertEquals(rows[i][0].getPaddedValue().toString(), new String(varCharVector.get(i))); } }
/** * Set the row count of all the vectors in this container. Also sets the value * count for each root level contained FieldVector. * @param rowCount Number of records. */ public void setRowCount(int rowCount) { this.rowCount = rowCount; for (FieldVector v : getFieldVectors()) { v.setValueCount(rowCount); } }
public void execute(int recordCount, List<ValueVector> outVectors) throws Exception { root.setRowCount(recordCount); List<ArrowBuf> buffers = Lists.newArrayList(); for (FieldVector v : root.getFieldVectors()) { buffers.addAll(v.getFieldBuffers()); } projector.evaluate(recordCount, buffers, outVectors); }
public ArrowRecordBatch getRecordBatch() { List<ArrowFieldNode> nodes = new ArrayList<>(); List<ArrowBuf> buffers = new ArrayList<>(); for (FieldVector vector : root.getFieldVectors()) { appendNodes(vector, nodes, buffers); } return new ArrowRecordBatch(root.getRowCount(), nodes, buffers, alignBuffers); }
/** * Validate two arrow vectorSchemaRoot are equal. * * @param root1 the 1st schema to compare * @param root2 the 2nd schema to compare * @throws IllegalArgumentException if they are different. */ public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaRoot root2) { compareSchemas(root2.getSchema(), root1.getSchema()); if (root1.getRowCount() != root2.getRowCount()) { throw new IllegalArgumentException("Different row count:\n" + root1.getRowCount() + " != " + root2.getRowCount()); } List<FieldVector> vectors1 = root1.getFieldVectors(); List<FieldVector> vectors2 = root2.getFieldVectors(); if (vectors1.size() != vectors2.size()) { throw new IllegalArgumentException("Different column count:\n" + vectors1.toString() + "\n!=\n" + vectors2.toString()); } for (int i = 0; i < vectors1.size(); i++) { compareFieldVectors(vectors1.get(i), vectors2.get(i)); } }
/** * Loads the record batch in the vectors. * will not close the record batch * * @param recordBatch the batch to load */ public void load(ArrowRecordBatch recordBatch) { Iterator<ArrowBuf> buffers = recordBatch.getBuffers().iterator(); Iterator<ArrowFieldNode> nodes = recordBatch.getNodes().iterator(); for (FieldVector fieldVector : root.getFieldVectors()) { loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes); } root.setRowCount(recordBatch.getLength()); if (nodes.hasNext() || buffers.hasNext()) { throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " + Collections2.toList(nodes).toString() + " buffers: " + Collections2.toList(buffers).toString()); } }
@Override public boolean next(NullWritable key, ArrowWrapperWritable value) throws IOException { try { // Need a way to know what thread to interrupt, since this is a blocking thread. setReaderThread(Thread.currentThread()); boolean hasInput = arrowStreamReader.loadNextBatch(); if (hasInput) { VectorSchemaRoot vectorSchemaRoot = arrowStreamReader.getVectorSchemaRoot(); //There must be at least one column vector Preconditions.checkState(vectorSchemaRoot.getFieldVectors().size() > 0); if(vectorSchemaRoot.getFieldVectors().get(0).getValueCount() == 0) { //An empty batch will appear at the end of the stream return false; } value.setVectorSchemaRoot(arrowStreamReader.getVectorSchemaRoot()); return true; } else { processReaderEvent(); return false; } } catch (IOException io) { failOnInterruption(io); return false; } }
List<FieldVector> vectors = batchData.getVectorSchemaRoot().getFieldVectors();
/** * Filter a batch of records against the expression. * @param recordCount - number of records to consume * @return the number of records that passed the filter * @throws GandivaException on evaluation exception. */ public int filterBatch(int recordCount) throws GandivaException { if (recordCount == 0) { return 0; } root.setRowCount(recordCount); List<ArrowBuf> buffers = Lists.newArrayList(); for (FieldVector v : root.getFieldVectors()) { buffers.addAll(v.getFieldBuffers()); } selectionVector.allocateNew(recordCount); // do not take ownership of the buffer. ArrowBuf svBuffer = selectionVector.getBuffer(false); SelectionVector selectionVectorGandiva = new SelectionVectorInt16(svBuffer); filter.evaluate(recordCount, buffers, selectionVectorGandiva); selectionVector.setRecordCount(selectionVectorGandiva.getRecordCount()); return selectionVector.getCount(); }