/** * Deserialize a message that is either an ArrowDictionaryBatch or ArrowRecordBatch. * * @param in ReadChannel to read messages from * @param alloc Allocator for message data * @return The deserialized record batch * @throws IOException if the message is not an ArrowDictionaryBatch or ArrowRecordBatch */ public static ArrowMessage deserializeMessageBatch(ReadChannel in, BufferAllocator alloc) throws IOException { return deserializeMessageBatch(new MessageChannelReader(in, alloc)); }
/** * Closes the underlying read source. * * @throws IOException on error */ @Override protected void closeReadSource() throws IOException { messageReader.close(); }
/** * Get the number of bytes read from the stream since constructing the reader. * * @return number of bytes */ @Override public long bytesRead() { return messageReader.bytesRead(); }
/** * Read a message from the ReadChannel and return a MessageResult containing the Message * metadata and optional message body data. Once the end-of-stream has been reached, a null * value will be returned. If the message has no body, then MessageResult.getBodyBuffer() * returns null. * * @return MessageResult or null if reached end-of-stream * @throws IOException on error */ public MessageResult readNext() throws IOException { // Read the flatbuf message and check for end-of-stream MessageMetadataResult result = MessageSerializer.readMessage(in); if (result == null) { return null; } Message message = result.getMessage(); ArrowBuf bodyBuffer = null; // Read message body data if defined in message if (result.messageHasBody()) { int bodyLength = (int) result.getMessageBodyLength(); bodyBuffer = MessageSerializer.readMessageBody(in, bodyLength, allocator); } return new MessageResult(message, bodyBuffer); }
protected ArrowBlock writeRecordBatch(ArrowRecordBatch batch) throws IOException { ArrowBlock block = MessageSerializer.serialize(out, batch); LOGGER.debug("RecordBatch at {}, metadata: {}, body: {}", block.getOffset(), block.getMetadataLength(), block.getBodyLength()); return block; }
private ArrowDictionaryBatch readDictionaryBatch(SeekableReadChannel in, ArrowBlock block, BufferAllocator allocator) throws IOException { LOGGER.debug("DictionaryRecordBatch at {}, metadata: {}, body: {}", block.getOffset(), block.getMetadataLength(), block.getBodyLength()); in.setPosition(block.getOffset()); ArrowDictionaryBatch batch = MessageSerializer.deserializeDictionaryBatch(in, block, allocator); if (batch == null) { throw new IOException("Invalid file. No batch at offset: " + block.getOffset()); } return batch; }
@Override public void close() { dictionary.close(); } }
/** * Reads the schema message from the beginning of the stream. * * @return the deserialized arrow schema */ @Override protected Schema readSchema() throws IOException { MessageResult result = messageReader.readNext(); if (result == null) { throw new IOException("Unexpected end of input. Missing schema."); } if (result.getMessage().headerType() != MessageHeader.Schema) { throw new IOException("Expected schema but header was " + result.getMessage().headerType()); } return MessageSerializer.deserializeSchema(result.getMessage()); }
private void ensureStarted() throws IOException { if (!started) { started = true; startInternal(out); // write the schema - for file formats this is duplicated in the footer, but matches // the streaming format MessageSerializer.serialize(out, schema); // write out any dictionaries for (ArrowDictionaryBatch batch : dictionaries) { try { writeDictionaryBatch(batch); } finally { batch.close(); } } } }
@Override public int writeTo(FlatBufferBuilder builder) { int dataOffset = dictionary.writeTo(builder); DictionaryBatch.startDictionaryBatch(builder); DictionaryBatch.addId(builder, dictionaryId); DictionaryBatch.addData(builder, dataOffset); return DictionaryBatch.endDictionaryBatch(builder); }
@Override public <T> T accepts(ArrowMessageVisitor<T> visitor) { return visitor.visit(this); }
@Override public int computeBodyLength() { return dictionary.computeBodyLength(); }
public List<ArrowBlock> getDictionaryBlocks() throws IOException { ensureInitialized(); return footer.getDictionaries(); }
public List<ArrowBlock> getRecordBlocks() throws IOException { ensureInitialized(); return footer.getRecordBatches(); }
/** * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking. * * @param in ReadableByteChannel to read messages from * @param allocator to allocate new buffers */ public ArrowStreamReader(ReadableByteChannel in, BufferAllocator allocator) { this(new MessageChannelReader(new ReadChannel(in), allocator), allocator); }
protected ArrowBlock writeDictionaryBatch(ArrowDictionaryBatch batch) throws IOException { ArrowBlock block = MessageSerializer.serialize(out, batch); LOGGER.debug("DictionaryRecordBatch at {}, metadata: {}, body: {}", block.getOffset(), block.getMetadataLength(), block.getBodyLength()); return block; }
@Override public void close() throws IOException { if(arrowRecordBatch != null) arrowRecordBatch.close(); if(vectorLoader != null) vectorLoader.close(); }
@Override public <T> T accepts(ArrowMessageVisitor<T> visitor) { return visitor.visit(this); }
@Override public void close() throws IOException { if(arrowRecordBatch != null) arrowRecordBatch.close(); if(vectorLoader != null) vectorLoader.close(); }
/** * Load an ArrowRecordBatch to the readers VectorSchemaRoot. * * @param batch the record batch to load */ protected void loadRecordBatch(ArrowRecordBatch batch) { try { loader.load(batch); } finally { batch.close(); } }