/** * Returns the ColumnarBatch object that will be used for all rows returned by this reader. * This object is reused. Calling this enables the vectorized reader. This should be called * before any calls to nextKeyValue/nextBatch. */ public ColumnarBatch resultBatch() { if (columnarBatch == null) initBatch(); return columnarBatch; }
@Override public boolean nextKeyValue() throws IOException { resultBatch(); if (returnColumnarBatch) return nextBatch(); if (batchIdx >= numBatched) { if (!nextBatch()) return false; } ++batchIdx; return true; }
/** * Utility API that will read all the data in path. This circumvents the need to create Hadoop * objects to use this class. `columns` can contain the list of columns to project. */ @Override public void initialize(String path, List<String> columns) throws IOException, UnsupportedOperationException { super.initialize(path, columns); initializeInternal(); }
/** * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { for (WritableColumnVector vector : columnVectors) { vector.reset(); } columnarBatch.setNumRows(0); if (rowsReturned >= totalRowCount) return false; checkEndOfRowGroup(); int num = (int) Math.min((long) capacity, totalCountLoadedSoFar - rowsReturned); for (int i = 0; i < columnReaders.length; ++i) { if (columnReaders[i] == null) continue; columnReaders[i].readBatch(num, columnVectors[i]); } rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; batchIdx = 0; return true; }
/** * Utility API that will read all the data in path. This circumvents the need to create Hadoop * objects to use this class. `columns` can contain the list of columns to project. */ @Override public void initialize(String path, List<String> columns) throws IOException, UnsupportedOperationException { super.initialize(path, columns); initializeInternal(); }
/** * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { for (WritableColumnVector vector : columnVectors) { vector.reset(); } columnarBatch.setNumRows(0); if (rowsReturned >= totalRowCount) return false; checkEndOfRowGroup(); int num = (int) Math.min((long) capacity, totalCountLoadedSoFar - rowsReturned); for (int i = 0; i < columnReaders.length; ++i) { if (columnReaders[i] == null) continue; columnReaders[i].readBatch(num, columnVectors[i]); } rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; batchIdx = 0; return true; }
@Override public boolean nextKeyValue() throws IOException { resultBatch(); if (returnColumnarBatch) return nextBatch(); if (batchIdx >= numBatched) { if (!nextBatch()) return false; } ++batchIdx; return true; }
/** * Returns the ColumnarBatch object that will be used for all rows returned by this reader. * This object is reused. Calling this enables the vectorized reader. This should be called * before any calls to nextKeyValue/nextBatch. */ public ColumnarBatch resultBatch() { if (columnarBatch == null) initBatch(); return columnarBatch; }
/** * Implementation of RecordReader API. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException { super.initialize(inputSplit, taskAttemptContext); initializeInternal(); }
/** * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { columnarBatch.reset(); if (rowsReturned >= totalRowCount) return false; checkEndOfRowGroup(); int num = (int) Math.min((long) columnarBatch.capacity(), totalCountLoadedSoFar - rowsReturned); for (int i = 0; i < columnReaders.length; ++i) { if (columnReaders[i] == null) continue; columnReaders[i].readBatch(num, columnarBatch.column(i)); } rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; batchIdx = 0; return true; }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { resultBatch(); if (returnColumnarBatch) return nextBatch(); if (batchIdx >= numBatched) { if (!nextBatch()) return false; } ++batchIdx; return true; }
private void initBatch() { initBatch(MEMORY_MODE, null, null); }
/** * Implementation of RecordReader API. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException { super.initialize(inputSplit, taskAttemptContext); initializeInternal(); }
/** * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { columnarBatch.reset(); if (rowsReturned >= totalRowCount) return false; checkEndOfRowGroup(); int num = (int) Math.min((long) columnarBatch.capacity(), totalCountLoadedSoFar - rowsReturned); for (int i = 0; i < columnReaders.length; ++i) { if (columnReaders[i] == null) continue; columnReaders[i].readBatch(num, columnarBatch.column(i)); } rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; batchIdx = 0; return true; }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { resultBatch(); if (returnColumnarBatch) return nextBatch(); if (batchIdx >= numBatched) { if (!nextBatch()) return false; } ++batchIdx; return true; }
public ColumnarBatch resultBatch() { if (columnarBatch == null) initBatch(); return columnarBatch; }
/** * Implementation of RecordReader API. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException { super.initialize(inputSplit, taskAttemptContext); initializeInternal(); }
public void initBatch() { initBatch(DEFAULT_MEMORY_MODE, null, null); }
/** * Utility API that will read all the data in path. This circumvents the need to create Hadoop * objects to use this class. `columns` can contain the list of columns to project. */ @Override public void initialize(String path, List<String> columns) throws IOException, UnsupportedOperationException { super.initialize(path, columns); initializeInternal(); }
public void initBatch(StructType partitionColumns, InternalRow partitionValues) { initBatch(DEFAULT_MEMORY_MODE, partitionColumns, partitionValues); }