private LevelReader buildLevelRLEReader(int maxLevel, Slice slice) { if (maxLevel == 0) { return new LevelNullReader(); } return new LevelRLEReader(new RunLengthBitPackingHybridDecoder(BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(slice.getBytes()))); }
public BinaryDictionary(DictionaryPage dictionaryPage, Integer length) throws IOException { super(dictionaryPage.getEncoding()); byte[] dictionaryBytes = dictionaryPage.getSlice().getBytes(); content = new Binary[dictionaryPage.getDictionarySize()]; int offset = 0; if (length == null) { for (int i = 0; i < content.length; i++) { int len = readIntLittleEndian(dictionaryBytes, offset); offset += 4; content[i] = Binary.fromByteArray(dictionaryBytes, offset, len); offset += len; } } else { checkArgument(length > 0, "Invalid byte array length: %s", length); for (int i = 0; i < content.length; i++) { content[i] = Binary.fromByteArray(dictionaryBytes, offset, length); offset += length; } } }
@Override public void initFromPage(int valueCount, byte[] page, int offset) throws IOException { checkArgument(page.length > offset, "Attempt to read offset not in the page"); ByteArrayInputStream in = new ByteArrayInputStream(page, offset, page.length - offset); int bitWidth = BytesUtils.readIntLittleEndianOnOneByte(in); decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in); }
public static int readIntLittleEndianPaddedOnBitWidth(InputStream in, int bitWidth) throws IOException { int bytesWidth = paddedByteCountFromBits(bitWidth); switch (bytesWidth) { case 0: return 0; case 1: return BytesUtils.readIntLittleEndianOnOneByte(in); case 2: return BytesUtils.readIntLittleEndianOnTwoBytes(in); case 3: return BytesUtils.readIntLittleEndianOnThreeBytes(in); case 4: return BytesUtils.readIntLittleEndian(in); default: throw new IOException( String.format("Encountered bitWidth (%d) that requires more than 4 bytes", bitWidth)); } }
/** * Write a little endian int to out, using the the number of bytes required by * bit width */ public static void writeIntLittleEndianPaddedOnBitWidth(OutputStream out, int v, int bitWidth) throws IOException { int bytesWidth = paddedByteCountFromBits(bitWidth); switch (bytesWidth) { case 0: break; case 1: writeIntLittleEndianOnOneByte(out, v); break; case 2: writeIntLittleEndianOnTwoBytes(out, v); break; case 3: writeIntLittleEndianOnThreeBytes(out, v); break; case 4: writeIntLittleEndian(out, v); break; default: throw new IOException( String.format("Encountered value (%d) that requires more than 4 bytes", v)); } }
/** * @return size of the data as it would be written */ public long getBufferSize() { return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth); }
@Override public void writeAllTo(OutputStream out) throws IOException { BytesUtils.writeIntLittleEndian(out, intValue); }
@Override public void writeAllTo(OutputStream out) throws IOException { BytesUtils.writeUnsignedVarInt(intValue, out); }
private void writeRleRun() throws IOException { // we may have been working on a bit-packed-run // so close that run if it exists before writing this // rle-run endPreviousBitPackedRun(); // write the rle-header (lsb of 0 signifies a rle run) BytesUtils.writeUnsignedVarInt(repeatCount << 1, baos); // write the repeated-value BytesUtils.writeIntLittleEndianPaddedOnBitWidth(baos, previousValue, bitWidth); // reset the repeat count repeatCount = 0; // throw away all the buffered values, they were just repeats and they've been written numBufferedValues = 0; }
/** * eagerly load all the data into memory * * @param valueCount count of values in this page * @param page the array to read from containing the page data (repetition levels, definition levels, data) * @param offset where to start reading from in the page * @throws IOException */ @Override public void initFromPage(int valueCount, byte[] page, int offset) throws IOException { in = new ByteArrayInputStream(page, offset, page.length - offset); this.config = DeltaBinaryPackingConfig.readConfig(in); this.page = page; this.totalValueCount = BytesUtils.readUnsignedVarInt(in); allocateValuesBuffer(); bitWidths = new int[config.miniBlockNumInABlock]; //read first value from header valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarInt(in); while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis loadNewBlockToBuffer(); } this.nextOffset = page.length - in.available(); }
private void readNext() throws IOException { Preconditions.checkArgument(in.available() > 0, "Reading past RLE/BitPacking stream."); final int header = BytesUtils.readUnsignedVarInt(in); mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; switch (mode) { case RLE: currentCount = header >>> 1; if (DEBUG) LOG.debug("reading " + currentCount + " values RLE"); currentValue = BytesUtils.readIntLittleEndianPaddedOnBitWidth(in, bitWidth); break; case PACKED: int numGroups = header >>> 1; currentCount = numGroups * 8; if (DEBUG) LOG.debug("reading " + currentCount + " values BIT PACKED"); currentBuffer = new int[currentCount]; // TODO: reuse a buffer byte[] bytes = new byte[numGroups * bitWidth]; // At the end of the file RLE data though, there might not be that many bytes left. int bytesToRead = (int)Math.ceil(currentCount * bitWidth / 8.0); bytesToRead = Math.min(bytesToRead, in.available()); new DataInputStream(in).readFully(bytes, 0, bytesToRead); for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) { packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex); } break; default: throw new ParquetDecodingException("not a valid mode " + mode); } } }
/** * uses a trick mentioned in https://developers.google.com/protocol-buffers/docs/encoding to read zigZag encoded data * @param in * @return * @throws IOException */ public static int readZigZagVarInt(InputStream in) throws IOException { int raw = readUnsignedVarInt(in); int temp = (((raw << 31) >> 31) ^ raw) >> 1; return temp ^ (raw & (1 << 31)); }
private void writeBitWidthForMiniBlock(int i) { try { BytesUtils.writeIntLittleEndianOnOneByte(baos, bitWidths[i]); } catch (IOException e) { throw new ParquetEncodingException("can not write bitwith for miniblock", e); } }
public static int readIntLittleEndianPaddedOnBitWidth(InputStream in, int bitWidth) throws IOException { int bytesWidth = paddedByteCountFromBits(bitWidth); switch (bytesWidth) { case 0: return 0; case 1: return BytesUtils.readIntLittleEndianOnOneByte(in); case 2: return BytesUtils.readIntLittleEndianOnTwoBytes(in); case 3: return BytesUtils.readIntLittleEndianOnThreeBytes(in); case 4: return BytesUtils.readIntLittleEndian(in); default: throw new IOException( String.format("Encountered bitWidth (%d) that requires more than 4 bytes", bitWidth)); } }
/** * Write a little endian int to out, using the the number of bytes required by * bit width */ public static void writeIntLittleEndianPaddedOnBitWidth(OutputStream out, int v, int bitWidth) throws IOException { int bytesWidth = paddedByteCountFromBits(bitWidth); switch (bytesWidth) { case 0: break; case 1: writeIntLittleEndianOnOneByte(out, v); break; case 2: writeIntLittleEndianOnTwoBytes(out, v); break; case 3: writeIntLittleEndianOnThreeBytes(out, v); break; case 4: writeIntLittleEndian(out, v); break; default: throw new IOException( String.format("Encountered value (%d) that requires more than 4 bytes", v)); } }
/** * @return size of the data as it would be written */ public long getBufferSize() { return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth); }
@Override public void writeAllTo(OutputStream out) throws IOException { BytesUtils.writeIntLittleEndian(out, intValue); }
@Override public void writeAllTo(OutputStream out) throws IOException { BytesUtils.writeUnsignedVarInt(intValue, out); }
/** * uses a trick mentioned in https://developers.google.com/protocol-buffers/docs/encoding to read zigZag encoded data * @param in * @return * @throws IOException */ public static int readZigZagVarInt(InputStream in) throws IOException { int raw = readUnsignedVarInt(in); int temp = (((raw << 31) >> 31) ^ raw) >> 1; return temp ^ (raw & (1 << 31)); }
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { int bitWidth = BytesUtils.getWidthFromMaxInt(getMaxLevel(descriptor, valuesType)); if (bitWidth == 0) { return new ZeroIntegerValuesReader(); } return new RunLengthBitPackingHybridValuesReader(bitWidth); } },