org.apache.arrow.vector.FieldVector java code examples

private void readList(FieldVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo) {
 final int size = arrowVector.getValueCount();
 final ArrowBuf offsets = arrowVector.getOffsetBuffer();
 final int OFFSET_WIDTH = 4;
 read(arrowVector.getChildrenFromFields().get(0),
   hiveVector.child,
   typeInfo.getListElementTypeInfo());
 for (int i = 0; i < size; i++) {
  if (arrowVector.isNull(i)) {
   VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
  } else {
   hiveVector.isNull[i] = false;
   final int offset = offsets.getInt(i * OFFSET_WIDTH);
   hiveVector.offsets[i] = offset;
   hiveVector.lengths[i] = offsets.getInt((i + 1) * OFFSET_WIDTH) - offset;
  }
 }
}

 arrowVector.setValueCount(isNative ? vectorizedRowBatch.size : batchSize);
} else {
 arrowVector.setInitialCapacity(isNative ? vectorizedRowBatch.size : batchSize);
 arrowVector.allocateNew();

private void readStruct(FieldVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo) {
 final int size = arrowVector.getValueCount();
 final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos();
 final int fieldSize = arrowVector.getChildrenFromFields().size();
 for (int i = 0; i < fieldSize; i++) {
  read(arrowVector.getChildrenFromFields().get(i), hiveVector.fields[i], fieldTypeInfos.get(i));
 }
 for (int i = 0; i < size; i++) {
  if (arrowVector.isNull(i)) {
   VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
  } else {
   hiveVector.isNull[i] = false;
  }
 }
}

public ArrowWrapperWritable emptyBatch() {
 rootVector.setValueCount(0);
 for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); fieldIndex++) {
  final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex);
  final String fieldName = fieldNames.get(fieldIndex);
  final FieldType fieldType = toFieldType(fieldTypeInfo);
  final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class);
  arrowVector.setInitialCapacity(0);
  arrowVector.allocateNew();
 }
 VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector);
 return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector);
}

private void readMap(FieldVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo) {
 final int size = arrowVector.getValueCount();
 final ListTypeInfo mapStructListTypeInfo = toStructListTypeInfo(typeInfo);
 final ListColumnVector mapStructListVector = toStructListVector(hiveVector);
 final StructColumnVector mapStructVector = (StructColumnVector) mapStructListVector.child;
 read(arrowVector, mapStructListVector, mapStructListTypeInfo);
 hiveVector.isRepeating = mapStructListVector.isRepeating;
 hiveVector.childCount = mapStructListVector.childCount;
 hiveVector.noNulls = mapStructListVector.noNulls;
 hiveVector.keys = mapStructVector.fields[0];
 hiveVector.values = mapStructVector.fields[1];
 System.arraycopy(mapStructListVector.offsets, 0, hiveVector.offsets, 0, size);
 System.arraycopy(mapStructListVector.lengths, 0, hiveVector.lengths, 0, size);
 System.arraycopy(mapStructListVector.isNull, 0, hiveVector.isNull, 0, size);
}

private void appendNodes(FieldVector vector, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) {
 nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1));
 List<ArrowBuf> fieldBuffers = vector.getFieldBuffers();
 List<BufferType> expectedBuffers = TypeLayout.getTypeLayout(vector.getField().getType()).getBufferTypes();
 if (fieldBuffers.size() != expectedBuffers.size()) {
  throw new IllegalArgumentException(String.format(
    "wrong number of buffers for field %s in vector %s. found: %s",
    vector.getField(), vector.getClass().getSimpleName(), fieldBuffers));
 }
 buffers.addAll(fieldBuffers);
 for (FieldVector child : vector.getChildrenFromFields()) {
  appendNodes(child, nodes, buffers);
 }
}

validateType(vector.getMinorType());
Map<Object, Integer> lookUps = new HashMap<>(dictionary.getVector().getValueCount());
for (int i = 0; i < dictionary.getVector().getValueCount(); i++) {
 lookUps.put(dictionary.getVector().getObject(i), i);
indices.allocateNew();
indices.setValueCount(count);

private void writeDictionaryBatches(JsonGenerator generator, Set<Long> dictionaryIdsUsed, DictionaryProvider provider)
  throws IOException {
 generator.writeArrayFieldStart("dictionaries");
 for (Long id : dictionaryIdsUsed) {
  generator.writeStartObject();
  generator.writeObjectField("id", id);
  generator.writeFieldName("data");
  Dictionary dictionary = provider.lookup(id);
  FieldVector vector = dictionary.getVector();
  List<Field> fields = Collections.singletonList(vector.getField());
  List<FieldVector> vectors = Collections.singletonList(vector);
  VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount());
  writeBatch(root);
  generator.writeEndObject();
 }
 generator.writeEndArray();
}

private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException {
 List<BufferType> vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes();
 List<ArrowBuf> vectorBuffers = vector.getFieldBuffers();
 if (vectorTypes.size() != vectorBuffers.size()) {
  throw new IllegalArgumentException("vector types and inner vector buffers are not the same size: " +
  int valueCount = vector.getValueCount();
  generator.writeObjectField("count", valueCount);
   final int bufferValueCount = (bufferType.equals(OFFSET)) ? valueCount + 1 : valueCount;
   for (int i = 0; i < bufferValueCount; i++) {
    if (bufferType.equals(DATA) && (vector.getMinorType() == MinorType.VARCHAR ||
        vector.getMinorType() == MinorType.VARBINARY)) {
     writeValueToGenerator(bufferType, vectorBuffer, vectorBuffers.get(v - 1), vector, i);
    } else {
  List<FieldVector> children = vector.getChildrenFromFields();
  if (fields.size() != children.size()) {
   throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " +

private void resetFirstAccumulatorVector() {
 Preconditions.checkArgument(accumulators.length == 1, "Error: incorrect number of batches in accumulator");
 final FieldVector vector = accumulators[0];
 Preconditions.checkArgument(vector != null, "Error: expecting a valid accumulator");
 final ArrowBuf validityBuffer = vector.getValidityBuffer();
 final ArrowBuf dataBuffer = vector.getDataBuffer();
 validityBuffer.readerIndex(0);
 validityBuffer.writerIndex(0);
 dataBuffer.readerIndex(0);
 dataBuffer.writerIndex(0);
 initialize(vector);
 vector.setValueCount(0);
}

public VectorSchemaRoot(FieldVector parent) {
 this(parent.getField().getChildren(), parent.getChildrenFromFields(), parent.getValueCount());
}

/**
 * Validate two arrow FieldVectors are equal.
 *
 * @param vector1 the 1st VectorField to compare
 * @param vector2 the 2nd VectorField to compare
 * @throws IllegalArgumentException if they are different
 */
public static void compareFieldVectors(FieldVector vector1, FieldVector vector2) {
 Field field1 = vector1.getField();
 if (!field1.equals(vector2.getField())) {
  throw new IllegalArgumentException("Different Fields:\n" + field1 + "\n!=\n" +
   vector2.getField());
 }
 int valueCount = vector1.getValueCount();
 if (valueCount != vector2.getValueCount()) {
  throw new IllegalArgumentException("Different value count for field " + field1 + " : " +
   valueCount + " != " + vector2.getValueCount());
 }
 for (int j = 0; j < valueCount; j++) {
  Object obj1 = vector1.getObject(j);
  Object obj2 = vector2.getObject(j);
  if (!equals(field1.getType(), obj1, obj2)) {
   throw new IllegalArgumentException(
     "Different values in column:\n" + field1 + " at index " + j + ": " + obj1 + " != " + obj2);
  }
 }
}

vector.setInitialCapacity(valueCount);
 vectorBuffers[v] = readIntoBuffer(allocator, bufferType, vector.getMinorType(), innerBufferValueCount);
vector.loadFieldBuffers(fieldNode, Arrays.asList(vectorBuffers));
 List<FieldVector> vectorChildren = vector.getChildrenFromFields();
 if (fields.size() != vectorChildren.size()) {
  throw new IllegalArgumentException(

/**
 * Do an adaptive allocation of each vector for memory purposes. Sizes will be based on previously
 * defined initial allocation for each vector (and subsequent size learnings).
 */
public void allocateNew() {
 for (FieldVector v : fieldVectors) {
  v.allocateNew();
 }
}

private static void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buffers, Iterator<ArrowFieldNode> nodes) {
 checkArgument(nodes.hasNext(),
   "no more field nodes for for field " + field + " and vector " + vector);
 ArrowFieldNode fieldNode = nodes.next();
 List<BufferLayout> bufferLayouts = TypeLayout.getTypeLayout(field.getType()).getBufferLayouts();
 List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayouts.size());
 for (int j = 0; j < bufferLayouts.size(); j++) {
  ownBuffers.add(buffers.next());
 }
 try {
  vector.loadFieldBuffers(fieldNode, ownBuffers);
 } catch (RuntimeException e) {
  throw new IllegalArgumentException("Could not load buffers for field " +
    field + ". error message: " + e.getMessage(), e);
 }
 List<Field> children = field.getChildren();
 if (children.size() > 0) {
  List<FieldVector> childrenFromFields = vector.getChildrenFromFields();
  checkArgument(children.size() == childrenFromFields.size(), "should have as many children as in the schema: found " + childrenFromFields.size() + " expected " + children.size());
  for (int i = 0; i < childrenFromFields.size(); i++) {
   Field child = children.get(i);
   FieldVector fieldVector = childrenFromFields.get(i);
   loadBuffers(fieldVector, child, buffers, nodes);
  }
 }
}

public void output(final int batchIndex) {
 final FieldVector accumulationVector = accumulators[batchIndex];
 final TransferPair transferPair= accumulationVector.makeTransferPair(transferVector);
 transferPair.transfer();
 if (batchIndex == 0) {
  ((FixedWidthVector) accumulationVector).allocateNew(maxValuesPerBatch);
  accumulationVector.setValueCount(0);
  initialize(accumulationVector);
  bitAddresses[batchIndex] = accumulationVector.getValidityBufferAddress();
  valueAddresses[batchIndex] = accumulationVector.getDataBufferAddress();

/**
 * Convert a field vector to a column vector
 * @param fieldVector the field vector to convert
 * @param type the type of the column vector
 * @return the converted ndarray
 */
public static INDArray convertArrowVector(FieldVector fieldVector,ColumnType type) {
  DataBuffer buffer = null;
  int cols = fieldVector.getValueCount();
  ByteBuffer direct = ByteBuffer.allocateDirect(fieldVector.getDataBuffer().capacity());
  direct.order(ByteOrder.nativeOrder());
  fieldVector.getDataBuffer().getBytes(0,direct);
  direct.rewind();
  switch(type) {
    case Integer:
      buffer = Nd4j.createBuffer(direct, DataBuffer.Type.INT,cols,0);
      break;
    case Float:
      buffer = Nd4j.createBuffer(direct, DataBuffer.Type.FLOAT,cols);
      break;
    case Double:
      buffer = Nd4j.createBuffer(direct, DataBuffer.Type.DOUBLE,cols);
      break;
    case Long:
      buffer =  Nd4j.createBuffer(direct, DataBuffer.Type.LONG,cols);
      break;
  }
  return Nd4j.create(buffer,new int[] {cols,1});
}

protected StructTransferPair(NonNullableStructVector from, NonNullableStructVector to, boolean allocate) {
 this.from = from;
 this.to = to;
 this.pairs = new TransferPair[from.size()];
 this.to.ephPair = null;
 int i = 0;
 FieldVector vector;
 for (String child : from.getChildFieldNames()) {
  int preSize = to.size();
  vector = from.getChild(child);
  if (vector == null) {
   continue;
  }
  //DRILL-1872: we add the child fields for the vector, looking up the field by name. For a map vector,
  // the child fields may be nested fields of the top level child. For example if the structure
  // of a child field is oa.oab.oabc then we add oa, then add oab to oa then oabc to oab.
  // But the children member of a Materialized field is a HashSet. If the fields are added in the
  // children HashSet, and the hashCode of the Materialized field includes the hash code of the
  // children, the hashCode value of oa changes *after* the field has been added to the HashSet.
  // (This is similar to what happens in ScanBatch where the children cannot be added till they are
  // read). To take care of this, we ensure that the hashCode of the MaterializedField does not
  // include the hashCode of the children but is based only on MaterializedField$key.
  final FieldVector newVector = to.addOrGet(child, vector.getField().getFieldType(), vector.getClass());
  if (allocate && to.size() != preSize) {
   newVector.allocateNew();
  }
  pairs[i++] = vector.makeTransferPair(newVector);
 }
}

final Reallocator realloc = this.realloc;
VariableLengthValidator.validateVariable(source, source.getValueCount());
final long srcOffsetAddr = source.getOffsetBufferAddress();
final long srcDataAddr = source.getDataBufferAddress();
long dstOffsetAddr = target.getOffsetBufferAddress() + 4;
long curDataAddr = realloc.addr(); // start address for next copy in target
long maxDataAddr = realloc.max(); // max bytes we can copy to target before we need to reallocate

public void setValueCount(final int batchIndex, final int valueCount) {
 accumulators[batchIndex].setValueCount(valueCount);
}

Javadoc

A vector corresponding to a Field in the schema. It has inner vectors backed by buffers (validity, offsets, data, ...)

Most used methods

getValueCount
allocateNew
getChildrenFromFields
The returned list is the same size as the list passed to initializeChildrenFromFields.
isNull
setValueCount
clear
close
getBufferSize
getBufferSizeFor
getDataBuffer
getField
getFieldBuffers
Get the buffers of the fields, (same size as getFieldVectors() since it is their content).

Popular in Java

Creating JSON documents from java classes using gson
getExternalFilesDir (Context)
findViewById (Activity)
getResourceAsStream (ClassLoader)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Best IntelliJ plugins

How to useFieldVector in org.apache.arrow.vector

Best Java code snippets using org.apache.arrow.vector.FieldVector (Showing top 20 results out of 315)

How to use
FieldVector
in
org.apache.arrow.vector