public void allocate(){ vector.allocateNew(); }
public void populate(final int count){ for (int i = 0; i < count; i++) { if(value != null) { ((VarBinaryVector) vector).setSafe(i, value, 0, value.length); } } vector.setValueCount(count); }
/** * Create an ndarray vector that stores structs * of {@link INDArray} * based on the {@link org.apache.arrow.flatbuf.Tensor} * format * @param allocator the allocator to use * @param name the name of the vector * @param length the number of vectors to store * @return */ public static VarBinaryVector ndarrayVectorOf(BufferAllocator allocator,String name,int length) { VarBinaryVector ret = new VarBinaryVector(name,allocator); ret.allocateNewSafe(); ret.setValueCount(length); return ret; }
static void populate(VarBinaryVector vector, byte[][] values){ vector.allocateNew(); Random r = new Random(); for(int i =0; i < values.length; i++){ byte[] val = values[i]; if(val != null){ vector.setSafe(i, val, 0, val.length); } else { // add noise. this confirms that after pivot, noise is gone. byte[] bytes = new byte[r.nextInt(15)]; r.nextBytes(bytes); vector.setSafe(i, bytes, 0, bytes.length); vector.setNull(i); } } vector.setValueCount(values.length); }
@Override public int next() { final Stopwatch watch = Stopwatch.createStarted(); if (keyVector != null) { keyVector.clear(); keyVector.allocateNew(); } if (valueVector != null) { valueVector.clear(); valueVector.allocateNew(); } int recordCount = 0; int batchSize = 0; try { while (recordCount < numRowsPerBatch && batchSize < numBytesPerBatch && reader.next(key, value)) { keyVector.setSafe(recordCount, key.getBytes(), 0, key.getLength()); valueVector.setSafe(recordCount, value.getBytes(), 0, value.getLength()); batchSize += (key.getLength() + value.getLength()); ++recordCount; } keyVector.setValueCount(recordCount); valueVector.setValueCount(recordCount); logger.debug("Read {} records in {} ms", recordCount, watch.elapsed(TimeUnit.MILLISECONDS)); return recordCount; } catch (IOException ioe) { close(); throw UserException.dataReadError(ioe).addContext("File Path", split.getPath().toString()).build(logger); } }
private static VectorContainer buildBinaryGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) { final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Binary(), null); final VectorContainer input = new VectorContainer(bufferAllocator); final VarBinaryVector binaryVector = input.addOrGet(field); binaryVector.allocateNew(); final SortedSet<Binary> values = new TreeSet<>(); for (Dictionary dictionary : dictionaries) { for (int i = 0; i <= dictionary.getMaxId(); ++i) { values.add(dictionary.decodeToBinary(i)); } } if (existingDict != null) { final VarBinaryVector existingDictValues = existingDict.getValueAccessorById(VarBinaryVector.class, 0).getValueVector(); for (int i = 0; i < existingDict.getRecordCount(); ++i) { values.add(Binary.fromConstantByteArray(existingDictValues.get(i))); } } final Iterator<Binary> iter = values.iterator(); int recordCount = 0; while (iter.hasNext()) { final byte[] data = iter.next().getBytes(); binaryVector.setSafe(recordCount++, data, 0, data.length); } binaryVector.setValueCount(recordCount); input.setRecordCount(recordCount); input.buildSchema(BatchSchema.SelectionVectorMode.NONE); return input; }
private static Pair<VarBinaryVector, ResultVerifier> testVarBinaryVector(final int startIndexInCurrentOutput, final int startIndexInJob) { VarBinaryVector colVarBinaryV = new VarBinaryVector("colVarBinary", allocator); colVarBinaryV.allocateNew(500, 5); colVarBinaryV.set(0, "value1".getBytes()); colVarBinaryV.set(1, "long long long long long long long long long long long long long long long value".getBytes() ); colVarBinaryV.set(2, "long long long long value".getBytes()); colVarBinaryV.setNull(3); colVarBinaryV.set(4, "l".getBytes());
binaryVector.allocateNew(); binaryVector.setSafe(0, "abc".getBytes(UTF8), 0, 3); binaryVector.setSafe(1, "bcd".getBytes(UTF8), 0, 3); binaryVector.setSafe(2, "cde".getBytes(UTF8), 0, 3); binaryVector.setSafe(3, "def".getBytes(UTF8), 0, 3); binaryVector.setSafe(4, "efg".getBytes(UTF8), 0, 3); binaryVector.setValueCount(5); dict2.setRecordCount(5); dict2.buildSchema(BatchSchema.SelectionVectorMode.NONE);
new VarBinaryVector("binary", allocator)) { AllocationHelper.allocate(intVector, 4, 4); AllocationHelper.allocate(binVector, 4, 5); binVector.setSafe(0, "ZERO".getBytes(), 0, "ZERO".getBytes().length); intVector.setSafe(1, 1); binVector.setSafe(1, "ONE".getBytes(), 0, "ONE".getBytes().length); intVector.setSafe(2, 2); binVector.setSafe(2, "TWO".getBytes(), 0, "TWO".getBytes().length); intVector.setSafe(3, 3); binVector.setSafe(3, "THREE".getBytes(), 0, "TWO".getBytes().length); intVector.setValueCount(4); binVector.setValueCount(4);
/** * Returns a vector representing a tensor view * of each ndarray. * Each ndarray will be a "row" represented as a tensor object * with in the return {@link VarBinaryVector} * @param bufferAllocator the buffer allocator to use * @param name the name of the column * @param data the input arrays * @return */ public static VarBinaryVector vectorFor(BufferAllocator bufferAllocator,String name,INDArray[] data) { VarBinaryVector ret = new VarBinaryVector(name,bufferAllocator); ret.allocateNew(); for(int i = 0; i < data.length; i++) { //slice the databuffer to use only the needed portion of the buffer //for proper offset ByteBuffer byteBuffer = BinarySerde.toByteBuffer(data[i]); ret.set(i,byteBuffer,0,byteBuffer.capacity()); } return ret; }
Stopwatch watch = Stopwatch.createStarted(); if (rowKeyVector != null) { rowKeyVector.clear(); rowKeyVector.allocateNew(); rowKeyVector.setSafe(rowCount, cells[0].getRowArray(), cells[0].getRowOffset(), cells[0].getRowLength()); final int valueLength = cell.getValueLength(); final byte[] valueArray = cell.getValueArray(); v.setSafe(rowCount, valueArray, valueOffset, valueLength);
@Override public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) { final byte[] value = ((BinaryObjectInspector)oi).getPrimitiveJavaObject(hiveFieldValue); ((VarBinaryVector) outputVV).setSafe(outputIndex, value, 0, value.length); } }
public void getFieldById(int fieldId, NullableVarBinaryHolder holder) { ((VarBinaryVector) vectors[fieldId]).get(currentIndex, holder); }
@Override public boolean setSafe(int index, ArrowBuf value, int start, int length) { if (index >= nullableVarBinaryVector.getValueCapacity()) { return false; } if (usingDictionary) { ByteBuffer buf = currDictValToWrite.toByteBuffer(); nullableVarBinaryVector.setSafe(index, buf, buf.position(), currDictValToWrite.length()); } else { nullableVarBinaryVector.setSafe(index, 1, start, start + length, value); } return true; }
@Override protected void readField(long recordsToReadInThisPass) { this.bytebuf = pageReader.pageData; if (usingDictionary) { Binary currDictValToWrite; for (int i = 0; i < recordsReadInThisIteration; i++){ currDictValToWrite = pageReader.dictionaryValueReader.readBytes(); ByteBuffer buf = currDictValToWrite.toByteBuffer(); valueVec.setSafe(valuesReadInCurrentPass + i, buf, buf.position(), currDictValToWrite.length()); } // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding // and we will go into the else condition below. The readField method of the parent class requires the // writer index to be set correctly. int writerIndex = valueVec.getDataBuffer().writerIndex(); valueVec.getDataBuffer().setIndex(0, writerIndex + (int)readLength); } else { super.readField(recordsToReadInThisPass); // TODO - replace this with fixed binary type in Dremio // for now we need to write the lengths of each value int byteLength = dataTypeLengthInBits / 8; for (int i = 0; i < recordsToReadInThisPass; i++) { valueVec.setValueLengthSafe(valuesReadInCurrentPass + i, byteLength); } } } }
@Override final byte[] getBinary(int rowId) { return accessor.getObject(rowId); } }
public void loadData(SerializedField metadata, ArrowBuf buffer) { /* clear the current buffers (if any) */ vector.clear(); /* get the metadata children */ final SerializedField bitsField = metadata.getChild(0); final SerializedField valuesField = metadata.getChild(1); final int valuesLength = buffer.capacity(); vector.allocateNew(valuesLength, metadata.getValueCount()); /* set inner validity buffer */ setValidityBuffer(bitsField); /* load inner offset and value buffers */ vector.offsetBuffer.close(); vector.valueBuffer.close(); loadOffsetAndDataBuffer(valuesField, buffer.slice(0, valuesLength)); vector.setLastSet(metadata.getValueCount() - 1); }
@Override public FieldVector getNewVector( String name, FieldType fieldType, BufferAllocator allocator, CallBack schemaChangeCallback) { return new VarBinaryVector(name, fieldType, allocator); }