private void writeStruct(NonNullableStructVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final List<String> fieldNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); final ColumnVector[] hiveFieldVectors = hiveVector.fields; final int fieldSize = fieldTypeInfos.size(); for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final ColumnVector hiveFieldVector = hiveFieldVectors[fieldIndex]; final String fieldName = fieldNames.get(fieldIndex); final FieldVector arrowFieldVector = arrowVector.addOrGet(fieldName, toFieldType(fieldTypeInfos.get(fieldIndex)), FieldVector.class); arrowFieldVector.setInitialCapacity(size); arrowFieldVector.allocateNew(); write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative); } final ArrowBuf validityBuffer = arrowVector.getValidityBuffer(); for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0); } else { BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex); } } }
@Override public void close(Reporter reporter) throws IOException { try { arrowStreamWriter.close(); } finally { rootVector.close(); //bytesLeaked should always be 0 long bytesLeaked = allocator.getAllocatedMemory(); if(bytesLeaked != 0) { LOG.error("Arrow memory leaked bytes: {}", bytesLeaked); throw new IllegalStateException("Arrow memory leaked bytes:" + bytesLeaked); } allocator.close(); } }
@Override public TinyIntWriter tinyInt(String name) { FieldWriter writer = fields.get(handleCase(name)); if(writer == null) { ValueVector vector; ValueVector currentVector = container.getChild(name); TinyIntVector v = container.addOrGet(name, FieldType.nullable( MinorType.TINYINT.getType() ), TinyIntVector.class); writer = new PromotableWriter(v, container, getNullableStructWriterFactory()); vector = v; if (currentVector == null || currentVector != vector) { if(this.initialCapacity > 0) { vector.setInitialCapacity(this.initialCapacity); } vector.allocateNewSafe(); } writer.setPosition(idx()); fields.put(handleCase(name), writer); } else { if (writer instanceof PromotableWriter) { // ensure writers are initialized ((PromotableWriter)writer).getWriter(MinorType.TINYINT); } } return writer; }
final NonNullableStructVector structVector = new NonNullableStructVector("", allocator, null); final ComplexWriterImpl writer = new ComplexWriterImpl("col", structVector); final StructWriter struct = writer.rootAsStruct(); System.out.println("Map of Object[0]: " + ow.writeValueAsString(structVector.getObject(0))); System.out.println("Map of Object[1]: " + ow.writeValueAsString(structVector.getObject(1))); final FieldReader reader = structVector.getChild("col", StructVector.class).getReader(); reader.setPosition(0); jsonWriter.write(reader);
public FieldVector addVector(FieldVector v) { String name = v.getMinorType().name().toLowerCase(); Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name)); final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass()); v.makeTransferPair(newVector).transfer(); internalStruct.putChild(name, newVector); if (callBack != null) { callBack.doWork(); } return newVector; }
protected StructTransferPair(NonNullableStructVector from, NonNullableStructVector to, boolean allocate) { this.from = from; this.to = to; this.pairs = new TransferPair[from.size()]; this.to.ephPair = null; int i = 0; FieldVector vector; for (String child : from.getChildFieldNames()) { int preSize = to.size(); vector = from.getChild(child); if (vector == null) { continue; } //DRILL-1872: we add the child fields for the vector, looking up the field by name. For a map vector, // the child fields may be nested fields of the top level child. For example if the structure // of a child field is oa.oab.oabc then we add oa, then add oab to oa then oabc to oab. // But the children member of a Materialized field is a HashSet. If the fields are added in the // children HashSet, and the hashCode of the Materialized field includes the hash code of the // children, the hashCode value of oa changes *after* the field has been added to the HashSet. // (This is similar to what happens in ScanBatch where the children cannot be added till they are // read). To take care of this, we ensure that the hashCode of the MaterializedField does not // include the hashCode of the children but is based only on MaterializedField$key. final FieldVector newVector = to.addOrGet(child, vector.getField().getFieldType(), vector.getClass()); if (allocate && to.size() != preSize) { newVector.allocateNew(); } pairs[i++] = vector.makeTransferPair(newVector); } }
@Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { return new StructTransferPair(this, new NonNullableStructVector(name, allocator, fieldType, callBack), false); }
public SerializedField getMetadata() { SerializedField.Builder b = SerializedField.newBuilder() .setNamePart(NamePart.newBuilder().setName(structVector.getField().getName())) .setMajorType(Types.optional(MinorType.STRUCT)) .setBufferLength(structVector.getBufferSize()) .setValueCount(structVector.valueCount); for(ValueVector v : structVector.getChildren()) { b.addChild(TypeHelper.getMetadata(v)); } return b.build(); } }
private NonNullableStructVector structVector(String name) { NonNullableStructVector vector = new NonNullableStructVector(name, allocator, null); vector.setValueCount(COUNT); return vector;
public List<FieldVector> getChildrenFromFields() { return getChildren(); } }
@Override public Field getField() { List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>(); List<FieldVector> children = internalStruct.getChildren(); int[] typeIds = new int[children.size()]; for (ValueVector v : children) { typeIds[childFields.size()] = v.getMinorType().ordinal(); childFields.add(v.getField()); } return new Field(name, FieldType.nullable(new ArrowType.Union(Sparse, typeIds)), childFields); }
@Override public ListWriter list(String name) { String finalName = handleCase(name); FieldWriter writer = fields.get(finalName); int vectorCount = container.size(); if(writer == null) { writer = new PromotableWriter(container.addOrGet(name, FieldType.nullable(MinorType.LIST.getType()), ListVector.class), container, getNullableStructWriterFactory()); if (container.size() > vectorCount) { writer.allocate(); } writer.setPosition(idx()); fields.put(finalName, writer); } else { if (writer instanceof PromotableWriter) { // ensure writers are initialized ((PromotableWriter)writer).getWriter(MinorType.LIST); } } return writer; }
@Override public Object getObject(int index) { Map<String, Object> vv = new JsonStringHashMap<>(); for (String child : getChildFieldNames()) { ValueVector v = getChild(child); if (v != null && index < v.getValueCount()) { Object value = v.getObject(index); if (value != null) { vv.put(child, value); } } } return vv; }
public void setValueCount(int valueCount) { this.valueCount = valueCount; while (valueCount > getTypeBufferValueCapacity()) { reallocTypeBuffer(); } internalStruct.setValueCount(valueCount); }
@Override public Field getField() { return vector.getField(); }
for (Field child : container.getField().getChildren()) { MinorType minorType = Types.getMinorTypeForArrowType(child.getType()); switch (minorType) { break; case UNION: UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), FieldType.nullable(MinorType.UNION.getType()), UnionVector.class), getNullableStructWriterFactory()); fields.put(handleCase(child.getName()), writer); break;
@Override public FieldReader reader(String name) { FieldReader reader = fields.get(name); if (reader == null) { ValueVector child = vector.getChild(name); if (child == null) { reader = NullReader.INSTANCE; } else { reader = child.getReader(); } fields.put(name, reader); reader.setPosition(idx()); } return reader; }
@Override public Object getObject(int index) { if (isSet(index) == 0) { return null; } else { return super.getObject(index); } }
/** * Get the size (number of bytes) of underlying buffers used by this vector. * * @return size of underlying buffers. */ @Override public int getBufferSize() { if (valueCount == 0) { return 0; } return super.getBufferSize() + BitVectorHelper.getValidityBufferSize(valueCount); }
if (${uncappedName}Vector == null) { int vectorCount = internalStruct.size(); ${uncappedName}Vector = addOrGet(MinorType.${name?upper_case}, ${name}Vector.class); if (internalStruct.size() > vectorCount) { ${uncappedName}Vector.allocateNew();