public ArrowWrapperWritable emptyBatch() { rootVector.setValueCount(0); for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final String fieldName = fieldNames.get(fieldIndex); final FieldType fieldType = toFieldType(fieldTypeInfo); final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class); arrowVector.setInitialCapacity(0); arrowVector.allocateNew(); } VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector); return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector); }
@Override public void close(Reporter reporter) throws IOException { try { arrowStreamWriter.close(); } finally { rootVector.close(); //bytesLeaked should always be 0 long bytesLeaked = allocator.getAllocatedMemory(); if(bytesLeaked != 0) { LOG.error("Arrow memory leaked bytes: {}", bytesLeaked); throw new IllegalStateException("Arrow memory leaked bytes:" + bytesLeaked); } allocator.close(); } }
public Serializer(Configuration conf, String attemptId, List<TypeInfo> typeInfos, List<String> fieldNames) { this.fieldTypeInfos = typeInfos; this.fieldNames = fieldNames; long childAllocatorLimit = HiveConf.getLongVar(conf, HIVE_ARROW_BATCH_ALLOCATOR_LIMIT); //Use per-task allocator for accounting only, no need to reserve per-task memory long childAllocatorReservation = 0L; //Break out accounting of direct memory per-task, so we can check no memory is leaked when task is completed allocator = RootAllocatorFactory.INSTANCE.getRootAllocator(conf).newChildAllocator( attemptId, childAllocatorReservation, childAllocatorLimit); rootVector = StructVector.empty(null, allocator); //These last fields are unused in non-serde usage vectorizedRowBatch = null; vectorAssignRow = null; MAX_BUFFERED_ROWS = 0; }
public ArrowWrapperWritable serializeBatch(VectorizedRowBatch vectorizedRowBatch, boolean isNative) { rootVector.setValueCount(0); if(rootVector.getChild(fieldName) != null) { fieldExists = true; final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class); if(fieldExists) { arrowVector.setValueCount(isNative ? vectorizedRowBatch.size : batchSize); rootVector.setValueCount(batchSize); } else { rootVector.setValueCount(vectorizedRowBatch.size);
private void writeList(ListVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final int OFFSET_WIDTH = 4; final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo(); final ColumnVector hiveElementVector = hiveVector.child; final FieldVector arrowElementVector = (FieldVector) arrowVector.addOrGetVector(toFieldType(elementTypeInfo)).getVector(); arrowElementVector.setInitialCapacity(hiveVector.childCount); arrowElementVector.allocateNew(); write(arrowElementVector, hiveElementVector, elementTypeInfo, hiveVector.childCount, vectorizedRowBatch, isNative); final ArrowBuf offsetBuffer = arrowVector.getOffsetBuffer(); int nextOffset = 0; for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset); } else { offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset); nextOffset += (int) hiveVector.lengths[rowIndex]; arrowVector.setNotNull(rowIndex); } } offsetBuffer.setInt(size * OFFSET_WIDTH, nextOffset); }
ArrayAccessor(ListVector vector) { super(vector); this.accessor = vector; this.arrayData = new ArrowColumnVector(vector.getDataVector()); }
@Override final boolean isNullAt(int rowId) { // TODO: Workaround if vector has all non-null values, see ARROW-1948 if (accessor.getValueCount() > 0 && accessor.getValidityBuffer().capacity() == 0) { return false; } else { return super.isNullAt(rowId); } }
accessor = new StructAccessor(structVector); childColumns = new ArrowColumnVector[structVector.size()]; for (int i = 0; i < childColumns.length; ++i) { childColumns[i] = new ArrowColumnVector(structVector.getVectorById(i));
@Override final ColumnarArray getArray(int rowId) { ArrowBuf offsets = accessor.getOffsetBuffer(); int index = rowId * ListVector.OFFSET_WIDTH; int start = offsets.getInt(index); int end = offsets.getInt(index + ListVector.OFFSET_WIDTH); return new ColumnarArray(arrayData, start, end - start); } }
private void writeStruct(NonNullableStructVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final List<String> fieldNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); final ColumnVector[] hiveFieldVectors = hiveVector.fields; final int fieldSize = fieldTypeInfos.size(); for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final ColumnVector hiveFieldVector = hiveFieldVectors[fieldIndex]; final String fieldName = fieldNames.get(fieldIndex); final FieldVector arrowFieldVector = arrowVector.addOrGet(fieldName, toFieldType(fieldTypeInfos.get(fieldIndex)), FieldVector.class); arrowFieldVector.setInitialCapacity(size); arrowFieldVector.allocateNew(); write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative); } final ArrowBuf validityBuffer = arrowVector.getValidityBuffer(); for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0); } else { BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex); } } }
private void writeMap(ListVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final ListTypeInfo structListTypeInfo = toStructListTypeInfo(typeInfo); final ListColumnVector structListVector = toStructListVector(hiveVector); write(arrowVector, structListVector, structListTypeInfo, size, vectorizedRowBatch, isNative); final ArrowBuf validityBuffer = arrowVector.getValidityBuffer(); for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0); } else { BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex); } } }
@Override public void clear() { super.clear(); validityBuffer = releaseBuffer(validityBuffer); lastSet = 0; }
@Override public void reAlloc() { /* reallocate the validity buffer */ reallocValidityBuffer(); super.reAlloc(); }
ArrayAccessor(ListVector vector) { super(vector); this.accessor = vector; this.arrayData = new ArrowColumnVector(vector.getDataVector()); }
@Override final boolean isNullAt(int rowId) { // TODO: Workaround if vector has all non-null values, see ARROW-1948 if (accessor.getValueCount() > 0 && accessor.getValidityBuffer().capacity() == 0) { return false; } else { return super.isNullAt(rowId); } }
accessor = new StructAccessor(structVector); childColumns = new ArrowColumnVector[structVector.size()]; for (int i = 0; i < childColumns.length; ++i) { childColumns[i] = new ArrowColumnVector(structVector.getVectorById(i));
@Override final ColumnarArray getArray(int rowId) { ArrowBuf offsets = accessor.getOffsetBuffer(); int index = rowId * ListVector.OFFSET_WIDTH; int start = offsets.getInt(index); int end = offsets.getInt(index + ListVector.OFFSET_WIDTH); return new ColumnarArray(arrayData, start, end - start); } }
fieldSize = fieldTypeInfos.size(); rootVector = StructVector.empty(null, allocator);