private static void writeVarBinaryData(VarBinaryVector vector, int valueCount) { Charset utf8Charset = Charset.forName("UTF-8"); final byte[] even = "AAAAA1".getBytes(utf8Charset); final byte[] odd = "BBBBBBBBB2".getBytes(utf8Charset); for (int i = 0; i < valueCount; i++) { if (i % 2 == 0) { vector.setSafe(i, even); } else { vector.setSafe(i, odd); } } vector.setValueCount(valueCount); }
public void populate(final int count){ for (int i = 0; i < count; i++) { if(value != null) { ((VarBinaryVector) vector).setSafe(i, value, 0, value.length); } } vector.setValueCount(count); }
@Override public void set(ValueVector v, int index) { if(obj != null){ ((VarBinaryVector) v).setSafe(index, obj, 0, obj.length); } } }
@Override public void setSafeValue(ObjectInspector oi, Object hiveFieldValue, ValueVector outputVV, int outputIndex) { final byte[] value = ((BinaryObjectInspector)oi).getPrimitiveJavaObject(hiveFieldValue); ((VarBinaryVector) outputVV).setSafe(outputIndex, value, 0, value.length); } }
@Override public boolean setSafe(int index, ArrowBuf value, int start, int length) { if (index >= nullableVarBinaryVector.getValueCapacity()) { return false; } if (usingDictionary) { ByteBuffer buf = currDictValToWrite.toByteBuffer(); nullableVarBinaryVector.setSafe(index, buf, buf.position(), currDictValToWrite.length()); } else { nullableVarBinaryVector.setSafe(index, 1, start, start + length, value); } return true; }
@Override public boolean setSafe(int index, ArrowBuf value, int start, int length) { if (index >= varBinaryVector.getValueCapacity()) { return false; } if (usingDictionary) { currDictValToWrite = pageReader.dictionaryValueReader.readBytes(); ByteBuffer buf = currDictValToWrite.toByteBuffer(); varBinaryVector.setSafe(index, buf, buf.position(), currDictValToWrite.length()); } else { varBinaryVector.setSafe(index, 1, start, start + length, value); } return true; }
public void write(NullableVarBinaryHolder h) { vector.setSafe(idx(), h); vector.setValueCount(idx()+1); }
public void write(VarBinaryHolder h) { vector.setSafe(idx(), h); vector.setValueCount(idx()+1); }
public void writeVarBinary(int start, int end, ArrowBuf buffer) { vector.setSafe(idx(), 1, start, end, buffer); vector.setValueCount(idx()+1); }
static void populate(VarBinaryVector vector, byte[][] values){ vector.allocateNew(); Random r = new Random(); for(int i =0; i < values.length; i++){ byte[] val = values[i]; if(val != null){ vector.setSafe(i, val, 0, val.length); } else { // add noise. this confirms that after pivot, noise is gone. byte[] bytes = new byte[r.nextInt(15)]; r.nextBytes(bytes); vector.setSafe(i, bytes, 0, bytes.length); vector.setNull(i); } } vector.setValueCount(values.length); }
@Override public int next() { final Stopwatch watch = Stopwatch.createStarted(); if (keyVector != null) { keyVector.clear(); keyVector.allocateNew(); } if (valueVector != null) { valueVector.clear(); valueVector.allocateNew(); } int recordCount = 0; int batchSize = 0; try { while (recordCount < numRowsPerBatch && batchSize < numBytesPerBatch && reader.next(key, value)) { keyVector.setSafe(recordCount, key.getBytes(), 0, key.getLength()); valueVector.setSafe(recordCount, value.getBytes(), 0, value.getLength()); batchSize += (key.getLength() + value.getLength()); ++recordCount; } keyVector.setValueCount(recordCount); valueVector.setValueCount(recordCount); logger.debug("Read {} records in {} ms", recordCount, watch.elapsed(TimeUnit.MILLISECONDS)); return recordCount; } catch (IOException ioe) { close(); throw UserException.dataReadError(ioe).addContext("File Path", split.getPath().toString()).build(logger); } }
metadataVector.setSafe(i, e.metadata, 0, e.metadata.length);
@Override protected void readField(long recordsToReadInThisPass) { recordsReadInThisIteration = Math.min(pageReader.currentPageCount - pageReader.valuesRead, recordsToReadInThisPass - valuesReadInCurrentPass); readLengthInBits = recordsReadInThisIteration * dataTypeLengthInBits; readLength = (int) Math.ceil(readLengthInBits / 8.0); if (usingDictionary) { Binary currDictValToWrite = null; for (int i = 0; i < recordsReadInThisIteration; i++){ currDictValToWrite = pageReader.dictionaryValueReader.readBytes(); valueVec.setSafe(valuesReadInCurrentPass + i, currDictValToWrite.toByteBuffer(), 0, currDictValToWrite.length()); } // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding // and we will go into the else condition below. The readField method of the parent class requires the // writer index to be set correctly. int writerIndex = valueVec.getDataBuffer().writerIndex(); valueVec.getDataBuffer().setIndex(0, writerIndex + (int)readLength); } else { super.readField(recordsToReadInThisPass); } // TODO - replace this with fixed binary type in Dremio // now we need to write the lengths of each value int byteLength = dataTypeLengthInBits / 8; for (int i = 0; i < recordsToReadInThisPass; i++) { valueVec.setValueLengthSafe(valuesReadInCurrentPass + i, byteLength); } } }
private static VectorContainer buildBinaryGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) { final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Binary(), null); final VectorContainer input = new VectorContainer(bufferAllocator); final VarBinaryVector binaryVector = input.addOrGet(field); binaryVector.allocateNew(); final SortedSet<Binary> values = new TreeSet<>(); for (Dictionary dictionary : dictionaries) { for (int i = 0; i <= dictionary.getMaxId(); ++i) { values.add(dictionary.decodeToBinary(i)); } } if (existingDict != null) { final VarBinaryVector existingDictValues = existingDict.getValueAccessorById(VarBinaryVector.class, 0).getValueVector(); for (int i = 0; i < existingDict.getRecordCount(); ++i) { values.add(Binary.fromConstantByteArray(existingDictValues.get(i))); } } final Iterator<Binary> iter = values.iterator(); int recordCount = 0; while (iter.hasNext()) { final byte[] data = iter.next().getBytes(); binaryVector.setSafe(recordCount++, data, 0, data.length); } binaryVector.setValueCount(recordCount); input.setRecordCount(recordCount); input.buildSchema(BatchSchema.SelectionVectorMode.NONE); return input; }
@Override protected void readField(long recordsToReadInThisPass) { this.bytebuf = pageReader.pageData; if (usingDictionary) { Binary currDictValToWrite; for (int i = 0; i < recordsReadInThisIteration; i++){ currDictValToWrite = pageReader.dictionaryValueReader.readBytes(); ByteBuffer buf = currDictValToWrite.toByteBuffer(); valueVec.setSafe(valuesReadInCurrentPass + i, buf, buf.position(), currDictValToWrite.length()); } // Set the write Index. The next page that gets read might be a page that does not use dictionary encoding // and we will go into the else condition below. The readField method of the parent class requires the // writer index to be set correctly. int writerIndex = valueVec.getDataBuffer().writerIndex(); valueVec.getDataBuffer().setIndex(0, writerIndex + (int)readLength); } else { super.readField(recordsToReadInThisPass); // TODO - replace this with fixed binary type in Dremio // for now we need to write the lengths of each value int byteLength = dataTypeLengthInBits / 8; for (int i = 0; i < recordsToReadInThisPass; i++) { valueVec.setValueLengthSafe(valuesReadInCurrentPass + i, byteLength); } } } }
binVector.setSafe(0, "ZERO".getBytes(), 0, "ZERO".getBytes().length); intVector.setSafe(1, 1); binVector.setSafe(1, "ONE".getBytes(), 0, "ONE".getBytes().length); intVector.setSafe(2, 2); binVector.setSafe(2, "TWO".getBytes(), 0, "TWO".getBytes().length); intVector.setSafe(3, 3); binVector.setSafe(3, "THREE".getBytes(), 0, "TWO".getBytes().length); intVector.setValueCount(4); binVector.setValueCount(4);
nd4jArrayVector.setSafe(row,byteBuffer,0,byteBuffer.capacity()); break;
if(pv.getBinaryValue() != null){ byte[] bytes = pv.getBinaryValue().toByteArray(); varBinaryVector.setSafe(index, bytes, 0, bytes.length);
final VarBinaryVector binaryVector = dict2.addOrGet(field2); binaryVector.allocateNew(); binaryVector.setSafe(0, "abc".getBytes(UTF8), 0, 3); binaryVector.setSafe(1, "bcd".getBytes(UTF8), 0, 3); binaryVector.setSafe(2, "cde".getBytes(UTF8), 0, 3); binaryVector.setSafe(3, "def".getBytes(UTF8), 0, 3); binaryVector.setSafe(4, "efg".getBytes(UTF8), 0, 3); binaryVector.setValueCount(5); dict2.setRecordCount(5);