public LlapArrowBatchRecordReader(InputStream in, Schema schema, Class<ArrowWrapperWritable> clazz, JobConf job, Closeable client, Socket socket, long arrowAllocatorLimit) throws IOException { super(in, schema, clazz, job, client, socket); allocator = RootAllocatorFactory.INSTANCE.getOrCreateRootAllocator(arrowAllocatorLimit); this.arrowStreamReader = new ArrowStreamReader(socket.getInputStream(), allocator); }
@Override public void close() throws IOException { arrowStreamReader.close(); }
@Override public boolean next(NullWritable key, ArrowWrapperWritable value) throws IOException { try { // Need a way to know what thread to interrupt, since this is a blocking thread. setReaderThread(Thread.currentThread()); boolean hasInput = arrowStreamReader.loadNextBatch(); if (hasInput) { VectorSchemaRoot vectorSchemaRoot = arrowStreamReader.getVectorSchemaRoot(); //There must be at least one column vector Preconditions.checkState(vectorSchemaRoot.getFieldVectors().size() > 0); if(vectorSchemaRoot.getFieldVectors().get(0).getValueCount() == 0) { //An empty batch will appear at the end of the stream return false; } value.setVectorSchemaRoot(arrowStreamReader.getVectorSchemaRoot()); return true; } else { processReaderEvent(); return false; } } catch (IOException io) { failOnInterruption(io); return false; } }
/** * Load the next ArrowRecordBatch to the vector schema root if available. * * @return true if a batch was read, false on EOS * @throws IOException on error */ public boolean loadNextBatch() throws IOException { prepareLoadNextBatch(); MessageResult result = messageReader.readNext(); // Reached EOS if (result == null) { return false; } if (result.getMessage().headerType() != MessageHeader.RecordBatch) { throw new IOException("Expected RecordBatch but header was " + result.getMessage().headerType()); } ArrowBuf bodyBuffer = result.getBodyBuffer(); // For zero-length batches, need an empty buffer to deserialize the batch if (bodyBuffer == null) { bodyBuffer = allocator.getEmpty(); } ArrowRecordBatch batch = MessageSerializer.deserializeRecordBatch(result.getMessage(), bodyBuffer); loadRecordBatch(batch); return true; }
@Override public boolean next(NullWritable key, ArrowWrapperWritable value) throws IOException { try { // Need a way to know what thread to interrupt, since this is a blocking thread. setReaderThread(Thread.currentThread()); boolean hasInput = arrowStreamReader.loadNextBatch(); if (hasInput) { VectorSchemaRoot vectorSchemaRoot = arrowStreamReader.getVectorSchemaRoot(); //There must be at least one column vector Preconditions.checkState(vectorSchemaRoot.getFieldVectors().size() > 0); if(vectorSchemaRoot.getFieldVectors().get(0).getValueCount() == 0) { //An empty batch will appear at the end of the stream return false; } value.setVectorSchemaRoot(arrowStreamReader.getVectorSchemaRoot()); return true; } else { processReaderEvent(); return false; } } catch (IOException io) { failOnInterruption(io); return false; } }
public LlapArrowBatchRecordReader(InputStream in, Schema schema, Class<ArrowWrapperWritable> clazz, JobConf job, Closeable client, Socket socket, long arrowAllocatorLimit) throws IOException { super(in, schema, clazz, job, client, socket); allocator = RootAllocatorFactory.INSTANCE.getOrCreateRootAllocator(arrowAllocatorLimit); this.arrowStreamReader = new ArrowStreamReader(socket.getInputStream(), allocator); }
@Override public void close() throws IOException { arrowStreamReader.close(); }