public void write(VectorSchemaRoot recordBatch) throws IOException { if (!recordBatch.getSchema().equals(schema)) { throw new IllegalArgumentException("record batches must have the same schema: " + schema); } writeBatch(recordBatch); }
public void build() throws GandivaException { root = GandivaUtils.getSchemaRoot(incoming, referencedFields); projector = Projector.make(root.getSchema(), columnExprList); }
private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { generator.writeStartObject(); { generator.writeObjectField("count", recordBatch.getRowCount()); generator.writeArrayFieldStart("columns"); for (Field field : recordBatch.getSchema().getFields()) { FieldVector vector = recordBatch.getVector(field.getName()); writeFromVectorIntoJson(field, vector); } generator.writeEndArray(); } generator.writeEndObject(); }
/** * Validate two arrow vectorSchemaRoot are equal. * * @param root1 the 1st schema to compare * @param root2 the 2nd schema to compare * @throws IllegalArgumentException if they are different. */ public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaRoot root2) { compareSchemas(root2.getSchema(), root1.getSchema()); if (root1.getRowCount() != root2.getRowCount()) { throw new IllegalArgumentException("Different row count:\n" + root1.getRowCount() + " != " + root2.getRowCount()); } List<FieldVector> vectors1 = root1.getFieldVectors(); List<FieldVector> vectors2 = root2.getFieldVectors(); if (vectors1.size() != vectors2.size()) { throw new IllegalArgumentException("Different column count:\n" + vectors1.toString() + "\n!=\n" + vectors2.toString()); } for (int i = 0; i < vectors1.size(); i++) { compareFieldVectors(vectors1.get(i), vectors2.get(i)); } }
/** * Builds a gandiva filter for a given condition. * @param expr the filter expression * @param input the input container. * @param selectionVector - the output selection vector * @param stats * @return instance of Native Filter. * @throws GandivaException when we fail to make the gandiva filter */ static public NativeFilter build(LogicalExpression expr, VectorAccessible input, SelectionVector2 selectionVector) throws GandivaException { Set referencedFields = Sets.newHashSet(); Condition condition = GandivaExpressionBuilder.serializeExprToCondition(input, expr, referencedFields); VectorSchemaRoot root = GandivaUtils.getSchemaRoot(input, referencedFields); Filter filter = Filter.make(root.getSchema(), condition); return new NativeFilter(filter, root, selectionVector); }
this.out = new WriteChannel(out); List<Field> fields = new ArrayList<>(root.getSchema().getFields().size()); Set<Long> dictionaryIdsUsed = new HashSet<>(); for (Field field : root.getSchema().getFields()) { fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed)); this.schema = new Schema(fields, root.getSchema().getCustomMetadata());
public boolean read(VectorSchemaRoot root) throws IOException { JsonToken t = parser.nextToken(); if (t == START_OBJECT) { { int count = readNextField("count", Integer.class); root.setRowCount(count); nextFieldIs("columns"); readToken(START_ARRAY); { for (Field field : root.getSchema().getFields()) { FieldVector vector = root.getVector(field.getName()); readFromJsonIntoVector(field, vector); } } readToken(END_ARRAY); } readToken(END_OBJECT); return true; } else if (t == END_ARRAY) { root.setRowCount(0); return false; } else { throw new IllegalArgumentException("Invalid token: " + t); } }
/** * Read a datavec schema and record set * from the given arrow file. * @param input the input to read * @return the associated datavec schema and record */ public static Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException { BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); Schema retSchema = null; ArrowWritableRecordBatch ret = null; SeekableReadChannel channel = new SeekableReadChannel(input.getChannel()); ArrowFileReader reader = new ArrowFileReader(channel, allocator); reader.loadNextBatch(); retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema()); //load the batch VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot()); VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot()); ArrowRecordBatch recordBatch = unloader.getRecordBatch(); vectorLoader.load(recordBatch); ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot()); ret.setUnloader(unloader); return Pair.of(retSchema,ret); }
/** * Read a datavec schema and record set * from the given bytes (usually expected to be an arrow format file) * @param input the input to read * @return the associated datavec schema and record */ public static Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException { BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); Schema retSchema = null; ArrowWritableRecordBatch ret = null; SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input)); ArrowFileReader reader = new ArrowFileReader(channel, allocator); reader.loadNextBatch(); retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema()); //load the batch VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot()); VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot()); ArrowRecordBatch recordBatch = unloader.getRecordBatch(); vectorLoader.load(recordBatch); ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot()); ret.setUnloader(unloader); return Pair.of(retSchema,ret); }