private void readList(FieldVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final ArrowBuf offsets = arrowVector.getOffsetBuffer(); final int OFFSET_WIDTH = 4; read(arrowVector.getChildrenFromFields().get(0), hiveVector.child, typeInfo.getListElementTypeInfo()); for (int i = 0; i < size; i++) { if (arrowVector.isNull(i)) { VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i); } else { hiveVector.isNull[i] = false; final int offset = offsets.getInt(i * OFFSET_WIDTH); hiveVector.offsets[i] = offset; hiveVector.lengths[i] = offsets.getInt((i + 1) * OFFSET_WIDTH) - offset; } } }
arrowVector.setValueCount(isNative ? vectorizedRowBatch.size : batchSize); } else { arrowVector.setInitialCapacity(isNative ? vectorizedRowBatch.size : batchSize); arrowVector.allocateNew();
private void readStruct(FieldVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); final int fieldSize = arrowVector.getChildrenFromFields().size(); for (int i = 0; i < fieldSize; i++) { read(arrowVector.getChildrenFromFields().get(i), hiveVector.fields[i], fieldTypeInfos.get(i)); } for (int i = 0; i < size; i++) { if (arrowVector.isNull(i)) { VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i); } else { hiveVector.isNull[i] = false; } } }
public ArrowWrapperWritable emptyBatch() { rootVector.setValueCount(0); for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final String fieldName = fieldNames.get(fieldIndex); final FieldType fieldType = toFieldType(fieldTypeInfo); final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class); arrowVector.setInitialCapacity(0); arrowVector.allocateNew(); } VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector); return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector); }
private void readMap(FieldVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final ListTypeInfo mapStructListTypeInfo = toStructListTypeInfo(typeInfo); final ListColumnVector mapStructListVector = toStructListVector(hiveVector); final StructColumnVector mapStructVector = (StructColumnVector) mapStructListVector.child; read(arrowVector, mapStructListVector, mapStructListTypeInfo); hiveVector.isRepeating = mapStructListVector.isRepeating; hiveVector.childCount = mapStructListVector.childCount; hiveVector.noNulls = mapStructListVector.noNulls; hiveVector.keys = mapStructVector.fields[0]; hiveVector.values = mapStructVector.fields[1]; System.arraycopy(mapStructListVector.offsets, 0, hiveVector.offsets, 0, size); System.arraycopy(mapStructListVector.lengths, 0, hiveVector.lengths, 0, size); System.arraycopy(mapStructListVector.isNull, 0, hiveVector.isNull, 0, size); }
private void appendNodes(FieldVector vector, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List<ArrowBuf> fieldBuffers = vector.getFieldBuffers(); List<BufferType> expectedBuffers = TypeLayout.getTypeLayout(vector.getField().getType()).getBufferTypes(); if (fieldBuffers.size() != expectedBuffers.size()) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); } buffers.addAll(fieldBuffers); for (FieldVector child : vector.getChildrenFromFields()) { appendNodes(child, nodes, buffers); } }
validateType(vector.getMinorType()); Map<Object, Integer> lookUps = new HashMap<>(dictionary.getVector().getValueCount()); for (int i = 0; i < dictionary.getVector().getValueCount(); i++) { lookUps.put(dictionary.getVector().getObject(i), i); indices.allocateNew(); indices.setValueCount(count);
private void writeDictionaryBatches(JsonGenerator generator, Set<Long> dictionaryIdsUsed, DictionaryProvider provider) throws IOException { generator.writeArrayFieldStart("dictionaries"); for (Long id : dictionaryIdsUsed) { generator.writeStartObject(); generator.writeObjectField("id", id); generator.writeFieldName("data"); Dictionary dictionary = provider.lookup(id); FieldVector vector = dictionary.getVector(); List<Field> fields = Collections.singletonList(vector.getField()); List<FieldVector> vectors = Collections.singletonList(vector); VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount()); writeBatch(root); generator.writeEndObject(); } generator.writeEndArray(); }
private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException { List<BufferType> vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes(); List<ArrowBuf> vectorBuffers = vector.getFieldBuffers(); if (vectorTypes.size() != vectorBuffers.size()) { throw new IllegalArgumentException("vector types and inner vector buffers are not the same size: " + int valueCount = vector.getValueCount(); generator.writeObjectField("count", valueCount); final int bufferValueCount = (bufferType.equals(OFFSET)) ? valueCount + 1 : valueCount; for (int i = 0; i < bufferValueCount; i++) { if (bufferType.equals(DATA) && (vector.getMinorType() == MinorType.VARCHAR || vector.getMinorType() == MinorType.VARBINARY)) { writeValueToGenerator(bufferType, vectorBuffer, vectorBuffers.get(v - 1), vector, i); } else { List<FieldVector> children = vector.getChildrenFromFields(); if (fields.size() != children.size()) { throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " +
private void resetFirstAccumulatorVector() { Preconditions.checkArgument(accumulators.length == 1, "Error: incorrect number of batches in accumulator"); final FieldVector vector = accumulators[0]; Preconditions.checkArgument(vector != null, "Error: expecting a valid accumulator"); final ArrowBuf validityBuffer = vector.getValidityBuffer(); final ArrowBuf dataBuffer = vector.getDataBuffer(); validityBuffer.readerIndex(0); validityBuffer.writerIndex(0); dataBuffer.readerIndex(0); dataBuffer.writerIndex(0); initialize(vector); vector.setValueCount(0); }
public VectorSchemaRoot(FieldVector parent) { this(parent.getField().getChildren(), parent.getChildrenFromFields(), parent.getValueCount()); }
/** * Validate two arrow FieldVectors are equal. * * @param vector1 the 1st VectorField to compare * @param vector2 the 2nd VectorField to compare * @throws IllegalArgumentException if they are different */ public static void compareFieldVectors(FieldVector vector1, FieldVector vector2) { Field field1 = vector1.getField(); if (!field1.equals(vector2.getField())) { throw new IllegalArgumentException("Different Fields:\n" + field1 + "\n!=\n" + vector2.getField()); } int valueCount = vector1.getValueCount(); if (valueCount != vector2.getValueCount()) { throw new IllegalArgumentException("Different value count for field " + field1 + " : " + valueCount + " != " + vector2.getValueCount()); } for (int j = 0; j < valueCount; j++) { Object obj1 = vector1.getObject(j); Object obj2 = vector2.getObject(j); if (!equals(field1.getType(), obj1, obj2)) { throw new IllegalArgumentException( "Different values in column:\n" + field1 + " at index " + j + ": " + obj1 + " != " + obj2); } } }
vector.setInitialCapacity(valueCount); vectorBuffers[v] = readIntoBuffer(allocator, bufferType, vector.getMinorType(), innerBufferValueCount); vector.loadFieldBuffers(fieldNode, Arrays.asList(vectorBuffers)); List<FieldVector> vectorChildren = vector.getChildrenFromFields(); if (fields.size() != vectorChildren.size()) { throw new IllegalArgumentException(
/** * Do an adaptive allocation of each vector for memory purposes. Sizes will be based on previously * defined initial allocation for each vector (and subsequent size learnings). */ public void allocateNew() { for (FieldVector v : fieldVectors) { v.allocateNew(); } }
private static void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buffers, Iterator<ArrowFieldNode> nodes) { checkArgument(nodes.hasNext(), "no more field nodes for for field " + field + " and vector " + vector); ArrowFieldNode fieldNode = nodes.next(); List<BufferLayout> bufferLayouts = TypeLayout.getTypeLayout(field.getType()).getBufferLayouts(); List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayouts.size()); for (int j = 0; j < bufferLayouts.size(); j++) { ownBuffers.add(buffers.next()); } try { vector.loadFieldBuffers(fieldNode, ownBuffers); } catch (RuntimeException e) { throw new IllegalArgumentException("Could not load buffers for field " + field + ". error message: " + e.getMessage(), e); } List<Field> children = field.getChildren(); if (children.size() > 0) { List<FieldVector> childrenFromFields = vector.getChildrenFromFields(); checkArgument(children.size() == childrenFromFields.size(), "should have as many children as in the schema: found " + childrenFromFields.size() + " expected " + children.size()); for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); loadBuffers(fieldVector, child, buffers, nodes); } } }
public void output(final int batchIndex) { final FieldVector accumulationVector = accumulators[batchIndex]; final TransferPair transferPair= accumulationVector.makeTransferPair(transferVector); transferPair.transfer(); if (batchIndex == 0) { ((FixedWidthVector) accumulationVector).allocateNew(maxValuesPerBatch); accumulationVector.setValueCount(0); initialize(accumulationVector); bitAddresses[batchIndex] = accumulationVector.getValidityBufferAddress(); valueAddresses[batchIndex] = accumulationVector.getDataBufferAddress();
/** * Convert a field vector to a column vector * @param fieldVector the field vector to convert * @param type the type of the column vector * @return the converted ndarray */ public static INDArray convertArrowVector(FieldVector fieldVector,ColumnType type) { DataBuffer buffer = null; int cols = fieldVector.getValueCount(); ByteBuffer direct = ByteBuffer.allocateDirect(fieldVector.getDataBuffer().capacity()); direct.order(ByteOrder.nativeOrder()); fieldVector.getDataBuffer().getBytes(0,direct); direct.rewind(); switch(type) { case Integer: buffer = Nd4j.createBuffer(direct, DataBuffer.Type.INT,cols,0); break; case Float: buffer = Nd4j.createBuffer(direct, DataBuffer.Type.FLOAT,cols); break; case Double: buffer = Nd4j.createBuffer(direct, DataBuffer.Type.DOUBLE,cols); break; case Long: buffer = Nd4j.createBuffer(direct, DataBuffer.Type.LONG,cols); break; } return Nd4j.create(buffer,new int[] {cols,1}); }
protected StructTransferPair(NonNullableStructVector from, NonNullableStructVector to, boolean allocate) { this.from = from; this.to = to; this.pairs = new TransferPair[from.size()]; this.to.ephPair = null; int i = 0; FieldVector vector; for (String child : from.getChildFieldNames()) { int preSize = to.size(); vector = from.getChild(child); if (vector == null) { continue; } //DRILL-1872: we add the child fields for the vector, looking up the field by name. For a map vector, // the child fields may be nested fields of the top level child. For example if the structure // of a child field is oa.oab.oabc then we add oa, then add oab to oa then oabc to oab. // But the children member of a Materialized field is a HashSet. If the fields are added in the // children HashSet, and the hashCode of the Materialized field includes the hash code of the // children, the hashCode value of oa changes *after* the field has been added to the HashSet. // (This is similar to what happens in ScanBatch where the children cannot be added till they are // read). To take care of this, we ensure that the hashCode of the MaterializedField does not // include the hashCode of the children but is based only on MaterializedField$key. final FieldVector newVector = to.addOrGet(child, vector.getField().getFieldType(), vector.getClass()); if (allocate && to.size() != preSize) { newVector.allocateNew(); } pairs[i++] = vector.makeTransferPair(newVector); } }
final Reallocator realloc = this.realloc; VariableLengthValidator.validateVariable(source, source.getValueCount()); final long srcOffsetAddr = source.getOffsetBufferAddress(); final long srcDataAddr = source.getDataBufferAddress(); long dstOffsetAddr = target.getOffsetBufferAddress() + 4; long curDataAddr = realloc.addr(); // start address for next copy in target long maxDataAddr = realloc.max(); // max bytes we can copy to target before we need to reallocate
public void setValueCount(final int batchIndex, final int valueCount) { accumulators[batchIndex].setValueCount(valueCount); }