/** * Utility API that will read all the data in path. This circumvents the need to create Hadoop * objects to use this class. `columns` can contain the list of columns to project. */ @Override public void initialize(String path, List<String> columns) throws IOException, UnsupportedOperationException { super.initialize(path, columns); initializeInternal(); }
public VectorizedRleValuesReader(int bitWidth) { this.fixedWidth = true; this.readLength = bitWidth != 0; init(bitWidth); }
/** * Returns the ColumnarBatch object that will be used for all rows returned by this reader. * This object is reused. Calling this enables the vectorized reader. This should be called * before any calls to nextKeyValue/nextBatch. */ public ColumnarBatch resultBatch() { if (columnarBatch == null) initBatch(); return columnarBatch; }
@Override public boolean nextKeyValue() throws IOException { resultBatch(); if (returnColumnarBatch) return nextBatch(); if (batchIdx >= numBatched) { if (!nextBatch()) return false; } ++batchIdx; return true; }
/** * For all the read*Batch functions, reads `num` values from this columnReader into column. It * is guaranteed that num is smaller than the number of values left in the current page. */ private void readBooleanBatch(int rowId, int num, WritableColumnVector column) throws IOException { if (column.dataType() != DataTypes.BooleanType) { throw constructConvertNotSupportedException(descriptor, column); } defColumn.readBooleans( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); }
private void readFloatBatch(int rowId, int num, WritableColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: support implicit cast to double? if (column.dataType() == DataTypes.FloatType) { defColumn.readFloats( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw constructConvertNotSupportedException(descriptor, column); } }
private void readDoubleBatch(int rowId, int num, WritableColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions if (column.dataType() == DataTypes.DoubleType) { defColumn.readDoubles( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw constructConvertNotSupportedException(descriptor, column); } }
@Override public int readValueDictionaryId() { return readInteger(); }
/** * Implementation of RecordReader API. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException { super.initialize(inputSplit, taskAttemptContext); initializeInternal(); }
@Override public boolean nextKeyValue() throws IOException { resultBatch(); if (returnColumnarBatch) return nextBatch(); if (batchIdx >= numBatched) { if (!nextBatch()) return false; } ++batchIdx; return true; }
public VectorizedRleValuesReader(int bitWidth) { fixedWidth = true; init(bitWidth); }
@Override public boolean readBoolean() { return this.readInteger() != 0; }
/** * Utility API that will read all the data in path. This circumvents the need to create Hadoop * objects to use this class. `columns` can contain the list of columns to project. */ @Override public void initialize(String path, List<String> columns) throws IOException, UnsupportedOperationException { super.initialize(path, columns); initializeInternal(); }
@Override public boolean nextKeyValue() throws IOException, InterruptedException { resultBatch(); if (returnColumnarBatch) return nextBatch(); if (batchIdx >= numBatched) { if (!nextBatch()) return false; } ++batchIdx; return true; }
public VectorizedRleValuesReader(int bitWidth, boolean readLength) { this.fixedWidth = true; this.readLength = readLength; init(bitWidth); }
/** * Implementation of RecordReader API. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException { super.initialize(inputSplit, taskAttemptContext); initializeInternal(); }
public VectorizedRleValuesReader(int bitWidth) { this.fixedWidth = true; this.readLength = bitWidth != 0; init(bitWidth); }
/** * Implementation of RecordReader API. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException { super.initialize(inputSplit, taskAttemptContext); initializeInternal(); }
public VectorizedRleValuesReader(int bitWidth, boolean readLength) { this.fixedWidth = true; this.readLength = readLength; init(bitWidth); }
/** * Utility API that will read all the data in path. This circumvents the need to create Hadoop * objects to use this class. `columns` can contain the list of columns to project. */ @Override public void initialize(String path, List<String> columns) throws IOException, UnsupportedOperationException { super.initialize(path, columns); initializeInternal(); }