@Override public void close() throws IOException { arrowStreamReader.close(); }
public LlapArrowBatchRecordReader(InputStream in, Schema schema, Class<ArrowWrapperWritable> clazz, JobConf job, Closeable client, Socket socket, long arrowAllocatorLimit) throws IOException { super(in, schema, clazz, job, client, socket); allocator = RootAllocatorFactory.INSTANCE.getOrCreateRootAllocator(arrowAllocatorLimit); this.arrowStreamReader = new ArrowStreamReader(socket.getInputStream(), allocator); }
@Override public void write(K key, V value) throws IOException { ArrowWrapperWritable arrowWrapperWritable = (ArrowWrapperWritable) value; if (arrowStreamWriter == null) { VectorSchemaRoot vectorSchemaRoot = arrowWrapperWritable.getVectorSchemaRoot(); arrowStreamWriter = new ArrowStreamWriter(vectorSchemaRoot, null, out); allocator = arrowWrapperWritable.getAllocator(); this.out.setAllocator(allocator); rootVector = arrowWrapperWritable.getRootVector(); } arrowStreamWriter.writeBatch(); } }
@Override protected void endInternal(WriteChannel out) throws IOException { long footerStart = out.getCurrentPosition(); out.write(new ArrowFooter(schema, dictionaryBlocks, recordBlocks), false); int footerLength = (int) (out.getCurrentPosition() - footerStart); if (footerLength <= 0) { throw new InvalidArrowFileException("invalid footer"); } out.writeIntLittleEndian(footerLength); LOGGER.debug("Footer starts at {}, length: {}", footerStart, footerLength); ArrowMagic.writeMagic(out, false); LOGGER.debug("magic written, now at {}", out.getCurrentPosition()); }
@Override public boolean next(NullWritable key, ArrowWrapperWritable value) throws IOException { try { // Need a way to know what thread to interrupt, since this is a blocking thread. setReaderThread(Thread.currentThread()); boolean hasInput = arrowStreamReader.loadNextBatch(); if (hasInput) { VectorSchemaRoot vectorSchemaRoot = arrowStreamReader.getVectorSchemaRoot(); //There must be at least one column vector Preconditions.checkState(vectorSchemaRoot.getFieldVectors().size() > 0); if(vectorSchemaRoot.getFieldVectors().get(0).getValueCount() == 0) { //An empty batch will appear at the end of the stream return false; } value.setVectorSchemaRoot(arrowStreamReader.getVectorSchemaRoot()); return true; } else { processReaderEvent(); return false; } } catch (IOException io) { failOnInterruption(io); return false; } }
public void end() throws IOException { ensureStarted(); ensureEnded(); }
/** * Returns any dictionaries that were loaded along with ArrowRecordBatches. * * @return Map of dictionaries to dictionary id, empty if no dictionaries loaded * @throws IOException if reading of schema fails */ public Map<Long, Dictionary> getDictionaryVectors() throws IOException { ensureInitialized(); return dictionaries; }
@Override public void close(Reporter reporter) throws IOException { try { arrowStreamWriter.close(); } finally { rootVector.close(); //bytesLeaked should always be 0 long bytesLeaked = allocator.getAllocatedMemory(); if(bytesLeaked != 0) { LOG.error("Arrow memory leaked bytes: {}", bytesLeaked); throw new IllegalStateException("Arrow memory leaked bytes:" + bytesLeaked); } allocator.close(); } }
@Override protected void endInternal(WriteChannel out) throws IOException { writeEndOfStream(out); } }
/** * Get the number of bytes read from the ReadChannel. * * @return number of bytes */ public long bytesRead() { return in.bytesRead(); }
private void ensureEnded() throws IOException { if (!ended) { ended = true; endInternal(out); } }
@Override protected void startInternal(WriteChannel out) throws IOException { ArrowMagic.writeMagic(out, true); }
/** * Initialize if not done previously. * * @throws IOException on error */ protected void ensureInitialized() throws IOException { if (!initialized) { initialize(); initialized = true; } }
ArrowBuf readBuffer(BufferAllocator allocator, int count) throws IOException { readToken(START_ARRAY); ArrowBuf buf = read(allocator, count); readToken(END_ARRAY); return buf; } }
public long align() throws IOException { if (currentPosition % 8 != 0) { // align on 8 byte boundaries return writeZeros(8 - (int) (currentPosition % 8)); } return 0; }
public JSONWriteConfig pretty(boolean pretty) { return new JSONWriteConfig(pretty); } }
/** * Close resources, including vector schema root and dictionary vectors, and the * underlying read source. * * @throws IOException on error */ @Override public void close() throws IOException { close(true); }
/** * Returns the vector schema root. This will be loaded with new values on every call to loadNextBatch. * * @return the vector schema root * @throws IOException if reading of schema fails */ public VectorSchemaRoot getVectorSchemaRoot() throws IOException { ensureInitialized(); return root; }