@Override public void initFromPage(int valueCount, byte[] page, int start) { this.offset = start; this.in = page; if (fixedWidth) { if (bitWidth != 0) { int length = readIntLittleEndian(); this.end = this.offset + length; } } else { this.end = page.length; if (this.end != this.offset) init(page[this.offset++] & 255); } if (bitWidth == 0) { // 0 bit width, treat this as an RLE run of valueCount number of 0's. this.mode = MODE.RLE; this.currentCount = valueCount; this.currentValue = 0; } else { this.currentCount = 0; } }
/** * For all the read*Batch functions, reads `num` values from this columnReader into column. It * is guaranteed that num is smaller than the number of values left in the current page. */ private void readBooleanBatch(int rowId, int num, ColumnVector column) throws IOException { assert(column.dataType() == DataTypes.BooleanType); defColumn.readBooleans( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); }
private void readDoubleBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions if (column.dataType() == DataTypes.DoubleType) { defColumn.readDoubles( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } }
int header = readUnsignedVarInt(); this.mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; switch (mode) { case RLE: this.currentCount = header >>> 1; this.currentValue = readIntLittleEndianPaddedOnBitWidth(); return; case PACKED: int numGroups = header >>> 1; this.currentCount = numGroups * 8; int bytesToRead = ceil8(this.currentCount * this.bitWidth);
private void readIntBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions if (column.dataType() == DataTypes.IntegerType || column.dataType() == DataTypes.DateType || DecimalType.is32BitDecimalType(column.dataType())) { defColumn.readIntegers( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else if (column.dataType() == DataTypes.ByteType) { defColumn.readBytes( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else if (column.dataType() == DataTypes.ShortType) { defColumn.readShorts( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } }
@Override public int readValueDictionaryId() { return readInteger(); }
private void readBinaryBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions VectorizedValuesReader data = (VectorizedValuesReader) dataColumn; if (column.isArray()) { defColumn.readBinarys(num, column, rowId, maxDefLevel, data); } else if (column.dataType() == DataTypes.TimestampType) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putLong(rowId + i, // Read 12 bytes for INT96 ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12))); } else { column.putNull(rowId + i); } } } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } }
private void readLongBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. if (column.dataType() == DataTypes.LongType || DecimalType.is64BitDecimalType(column.dataType())) { defColumn.readLongs( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else if (column.dataType() == DataTypes.TimestampType) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putLong(rowId + i, DateTimeUtils.fromMillis(dataColumn.readLong())); } else { column.putNull(rowId + i); } } } else { throw new UnsupportedOperationException("Unsupported conversion to: " + column.dataType()); } }
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in) throws IOException { this.endOfPageValueCount = valuesRead + pageValueCount; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { throw new IOException( "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } @SuppressWarnings("deprecation") Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedRleValuesReader(); this.isCurrentPageDictionaryEncoded = true; } else { if (dataEncoding != Encoding.PLAIN) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedPlainValuesReader(); this.isCurrentPageDictionaryEncoded = false; } try { dataColumn.initFromPage(pageValueCount, in); } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } }
int header = readUnsignedVarInt(); this.mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; switch (mode) { case RLE: this.currentCount = header >>> 1; this.currentValue = readIntLittleEndianPaddedOnBitWidth(); return; case PACKED:
public VectorizedRleValuesReader(int bitWidth) { this.fixedWidth = true; this.readLength = bitWidth != 0; init(bitWidth); }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
private void readFloatBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: support implicit cast to double? if (column.dataType() == DataTypes.FloatType) { defColumn.readFloats( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw new UnsupportedOperationException("Unsupported conversion to: " + column.dataType()); } }
/** * Reads the next byteWidth little endian int. */ private int readIntLittleEndianPaddedOnBitWidth() { switch (bytesWidth) { case 0: return 0; case 1: return in[offset++] & 255; case 2: { int ch2 = in[offset] & 255; int ch1 = in[offset + 1] & 255; offset += 2; return (ch1 << 8) + ch2; } case 3: { int ch3 = in[offset] & 255; int ch2 = in[offset + 1] & 255; int ch1 = in[offset + 2] & 255; offset += 3; return (ch1 << 16) + (ch2 << 8) + (ch3 << 0); } case 4: { return readIntLittleEndian(); } } throw new RuntimeException("Unreachable"); }
private void readIntBatch(int rowId, int num, WritableColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions if (column.dataType() == DataTypes.IntegerType || column.dataType() == DataTypes.DateType || DecimalType.is32BitDecimalType(column.dataType())) { defColumn.readIntegers( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else if (column.dataType() == DataTypes.ByteType) { defColumn.readBytes( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else if (column.dataType() == DataTypes.ShortType) { defColumn.readShorts( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw constructConvertNotSupportedException(descriptor, column); } }
@Override public int readValueDictionaryId() { return readInteger(); }
if (column.dataType() == DataTypes.StringType || column.dataType() == DataTypes.BinaryType || DecimalType.isByteArrayDecimalType(column.dataType())) { defColumn.readBinarys(num, column, rowId, maxDefLevel, data); } else if (column.dataType() == DataTypes.TimestampType) { if (!shouldConvertTimestamps()) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { if (defColumn.readInteger() == maxDefLevel) {
int header = readUnsignedVarInt(); this.mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; switch (mode) { case RLE: this.currentCount = header >>> 1; this.currentValue = readIntLittleEndianPaddedOnBitWidth(); return; case PACKED: int numGroups = header >>> 1; this.currentCount = numGroups * 8; int bytesToRead = ceil8(this.currentCount * this.bitWidth);
private void readLongBatch(int rowId, int num, WritableColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. if (column.dataType() == DataTypes.LongType || DecimalType.is64BitDecimalType(column.dataType()) || originalType == OriginalType.TIMESTAMP_MICROS) { defColumn.readLongs( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else if (originalType == OriginalType.TIMESTAMP_MILLIS) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putLong(rowId + i, DateTimeUtils.fromMillis(dataColumn.readLong())); } else { column.putNull(rowId + i); } } } else { throw constructConvertNotSupportedException(descriptor, column); } }