public boolean loadRecordBatch(ArrowBlock block) throws IOException { ensureInitialized(); int blockIndex = footer.getRecordBatches().indexOf(block); if (blockIndex == -1) { throw new IllegalArgumentException("Arrow bock does not exist in record batches: " + block); } currentRecordBatch = blockIndex; return loadNextBatch(); }
@Override public boolean loadNextBatch() throws IOException { prepareLoadNextBatch(); if (currentRecordBatch < footer.getRecordBatches().size()) { ArrowBlock block = footer.getRecordBatches().get(currentRecordBatch++); ArrowRecordBatch batch = readRecordBatch(in, block, allocator); loadRecordBatch(batch); return true; } else { return false; } }
/** * Read a datavec schema and record set * from the given arrow file. * @param input the input to read * @return the associated datavec schema and record */ public static Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException { BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); Schema retSchema = null; ArrowWritableRecordBatch ret = null; SeekableReadChannel channel = new SeekableReadChannel(input.getChannel()); ArrowFileReader reader = new ArrowFileReader(channel, allocator); reader.loadNextBatch(); retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema()); //load the batch VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot()); VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot()); ArrowRecordBatch recordBatch = unloader.getRecordBatch(); vectorLoader.load(recordBatch); ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot()); ret.setUnloader(unloader); return Pair.of(retSchema,ret); }
public List<ArrowBlock> getDictionaryBlocks() throws IOException { ensureInitialized(); return footer.getDictionaries(); }
@Override public ArrowDictionaryBatch readDictionary() throws IOException { if (currentDictionaryBatch >= footer.getDictionaries().size()) { throw new IOException("Requested more dictionaries than defined in footer: " + currentDictionaryBatch); } ArrowBlock block = footer.getDictionaries().get(currentDictionaryBatch++); return readDictionaryBatch(in, block, allocator); }
/** * Read a datavec schema and record set * from the given bytes (usually expected to be an arrow format file) * @param input the input to read * @return the associated datavec schema and record */ public static Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException { BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); Schema retSchema = null; ArrowWritableRecordBatch ret = null; SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(input)); ArrowFileReader reader = new ArrowFileReader(channel, allocator); reader.loadNextBatch(); retSchema = toDatavecSchema(reader.getVectorSchemaRoot().getSchema()); //load the batch VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot()); VectorLoader vectorLoader = new VectorLoader(reader.getVectorSchemaRoot()); ArrowRecordBatch recordBatch = unloader.getRecordBatch(); vectorLoader.load(recordBatch); ret = asDataVecBatch(recordBatch,retSchema,reader.getVectorSchemaRoot()); ret.setUnloader(unloader); return Pair.of(retSchema,ret); }
public List<ArrowBlock> getRecordBlocks() throws IOException { ensureInitialized(); return footer.getRecordBatches(); }