public ArrowWrapperWritable emptyBatch() { rootVector.setValueCount(0); for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final String fieldName = fieldNames.get(fieldIndex); final FieldType fieldType = toFieldType(fieldTypeInfo); final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class); arrowVector.setInitialCapacity(0); arrowVector.allocateNew(); } VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector); return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector); }
private void writeStruct(NonNullableStructVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final List<String> fieldNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); final ColumnVector[] hiveFieldVectors = hiveVector.fields; final int fieldSize = fieldTypeInfos.size(); for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final ColumnVector hiveFieldVector = hiveFieldVectors[fieldIndex]; final String fieldName = fieldNames.get(fieldIndex); final FieldVector arrowFieldVector = arrowVector.addOrGet(fieldName, toFieldType(fieldTypeInfos.get(fieldIndex)), FieldVector.class); arrowFieldVector.setInitialCapacity(size); arrowFieldVector.allocateNew(); write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative); } final ArrowBuf validityBuffer = arrowVector.getValidityBuffer(); for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0); } else { BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex); } } }
} else { arrowVector.setInitialCapacity(isNative ? vectorizedRowBatch.size : batchSize); arrowVector.allocateNew();
private void writeList(ListVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final int OFFSET_WIDTH = 4; final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo(); final ColumnVector hiveElementVector = hiveVector.child; final FieldVector arrowElementVector = (FieldVector) arrowVector.addOrGetVector(toFieldType(elementTypeInfo)).getVector(); arrowElementVector.setInitialCapacity(hiveVector.childCount); arrowElementVector.allocateNew(); write(arrowElementVector, hiveElementVector, elementTypeInfo, hiveVector.childCount, vectorizedRowBatch, isNative); final ArrowBuf offsetBuffer = arrowVector.getOffsetBuffer(); int nextOffset = 0; for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset); } else { offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset); nextOffset += (int) hiveVector.lengths[rowIndex]; arrowVector.setNotNull(rowIndex); } } offsetBuffer.setInt(size * OFFSET_WIDTH, nextOffset); }
/** * Do an adaptive allocation of each vector for memory purposes. Sizes will be based on previously * defined initial allocation for each vector (and subsequent size learnings). */ public void allocateNew() { for (FieldVector v : fieldVectors) { v.allocateNew(); } }
/** * Project the build data (including keys from the probe) * @param offsetAddr * @param count */ private void projectBuild(final long offsetAddr, final int count){ buildCopyWatch.start(); if (buildCopiers.size() == 0) { // No data in build side final List<FieldVector> buildOutputs = this.buildOutputs; for (FieldVector fieldVector : buildOutputs) { fieldVector.allocateNew(); } } else { for (FieldBufferCopier c : buildCopiers) { c.copy(offsetAddr, count); } } buildCopyWatch.stop(); }
@Override public void copy(long offsetAddr, int count) { dst.allocateNew(); final long max = offsetAddr + count * STEP_SIZE; int target = 0; for(long addr = offsetAddr; addr < max; addr += STEP_SIZE) { final int batchNOFF = PlatformDependent.getInt(addr); transfer[batchNOFF >>> BATCH_BITS].copyValueSafe(batchNOFF & MAX_BATCH, target); target++; } }
@Override public void copy(long offsetAddr, int count) { dst.allocateNew(); final long max = offsetAddr + count * STEP_SIZE; int target = 0; for(long addr = offsetAddr; addr < max; addr += STEP_SIZE) { int index = Short.toUnsignedInt(PlatformDependent.getShort(addr)); transfer.copyValueSafe(index, target); target++; } }
fieldVector.allocateNew();
@Override public void copy(long offsetAddr, int count) { dst.allocateNew(); final long max = offsetAddr + count * BUILD_RECORD_LINK_SIZE; int target = 0; for(long addr = offsetAddr; addr < max; addr += BUILD_RECORD_LINK_SIZE) { final int batchIndex = PlatformDependent.getInt(addr); final int batchOffset = Short.toUnsignedInt(PlatformDependent.getShort(addr + 4)); transfer[batchIndex].copyValueSafe(batchOffset, target); target++; } }
@Override public void copy(long offsetAddr, int count) { dst.allocateNew(); final long max = offsetAddr + count * BUILD_RECORD_LINK_SIZE; int target = 0; for(long addr = offsetAddr; addr < max; addr += BUILD_RECORD_LINK_SIZE) { final int batchIndex = PlatformDependent.getInt(addr); if(batchIndex != SKIP){ final int batchOffset = Short.toUnsignedInt(PlatformDependent.getShort(addr + 4)); transfer[batchIndex].copyValueSafe(batchOffset, target); target++; } } }
childVector.allocateNew(); childVector.setValueCount(metadata.getValueCount());
protected StructTransferPair(NonNullableStructVector from, NonNullableStructVector to, boolean allocate) { this.from = from; this.to = to; this.pairs = new TransferPair[from.size()]; this.to.ephPair = null; int i = 0; FieldVector vector; for (String child : from.getChildFieldNames()) { int preSize = to.size(); vector = from.getChild(child); if (vector == null) { continue; } //DRILL-1872: we add the child fields for the vector, looking up the field by name. For a map vector, // the child fields may be nested fields of the top level child. For example if the structure // of a child field is oa.oab.oabc then we add oa, then add oab to oa then oabc to oab. // But the children member of a Materialized field is a HashSet. If the fields are added in the // children HashSet, and the hashCode of the Materialized field includes the hash code of the // children, the hashCode value of oa changes *after* the field has been added to the HashSet. // (This is similar to what happens in ScanBatch where the children cannot be added till they are // read). To take care of this, we ensure that the hashCode of the MaterializedField does not // include the hashCode of the children but is based only on MaterializedField$key. final FieldVector newVector = to.addOrGet(child, vector.getField().getFieldType(), vector.getClass()); if (allocate && to.size() != preSize) { newVector.allocateNew(); } pairs[i++] = vector.makeTransferPair(newVector); } }