private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) { try { if (maxLevel == 0) { return new NullIntIterator(); } return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new ParquetDecodingException("could not read levels in page for col " + descriptor, e); } }
/** * Creates a reader for definition and repetition levels, returning an optimized one if * the levels are not needed. */ protected static IntIterator createRLEIterator(int maxLevel, BytesInput bytes, ColumnDescriptor descriptor) throws IOException { try { if (maxLevel == 0) return new NullIntIterator(); return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new IOException("could not read levels in page for col " + descriptor, e); } }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
/** * * @return a new ByteBuffer materializing the contents of this input * @throws IOException if there is an exception reading */ public ByteBuffer toByteBuffer() throws IOException { return ByteBuffer.wrap(toByteArray()); }
private void readPageV1(DataPageV1 page) throws IOException { this.pageValueCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader; // Initialize the decoders. if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); } int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); dlReader = this.defColumn; this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { byte[] bytes = page.getBytes().toByteArray(); rlReader.initFromPage(pageValueCount, bytes, 0); int next = rlReader.getNextOffset(); dlReader.initFromPage(pageValueCount, bytes, next); next = dlReader.getNextOffset(); initDataReader(page.getValueEncoding(), bytes, next); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } }
/** * * @return a new ByteBuffer materializing the contents of this input * @throws IOException if there is an exception reading */ public ByteBuffer toByteBuffer() throws IOException { return ByteBuffer.wrap(toByteArray()); }
public void collect(BytesInput bytesInput) throws IOException { byte[] bytes = bytesInput.toByteArray(); slabs.add(bytes); size += bytes.length; }
public void collect(BytesInput bytesInput) throws IOException { byte[] bytes = bytesInput.toByteArray(); slabs.add(bytes); size += bytes.length; }
/** * copies the input into a new byte array * @param bytesInput a BytesInput * @return a copy of the BytesInput * @throws IOException if there is an exception when reading bytes from the BytesInput */ public static BytesInput copy(BytesInput bytesInput) throws IOException { return from(bytesInput.toByteArray()); }
/** * copies the input into a new byte array * @param bytesInput a BytesInput * @return a copy of the BytesInput * @throws IOException if there is an exception when reading bytes from the BytesInput */ public static BytesInput copy(BytesInput bytesInput) throws IOException { return from(bytesInput.toByteArray()); }
/** * Creates a reader for definition and repetition levels, returning an optimized one if * the levels are not needed. */ protected static IntIterator createRLEIterator(int maxLevel, BytesInput bytes, ColumnDescriptor descriptor) throws IOException { try { if (maxLevel == 0) return new NullIntIterator(); return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new IOException("could not read levels in page for col " + descriptor, e); } }
@Override public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException { decompressor.reset(); byte[] inputBytes = bytes.toByteArray(); decompressor.setInput(inputBytes, 0, inputBytes.length); byte[] output = new byte[uncompressedSize]; decompressor.decompress(output, 0, uncompressedSize); return BytesInput.from(output); }
@Override public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException { decompressor.reset(); byte[] inputBytes = bytes.toByteArray(); decompressor.setInput(inputBytes, 0, inputBytes.length); byte[] output = new byte[uncompressedSize]; decompressor.decompress(output, 0, uncompressedSize); return BytesInput.from(output); }
private void readPageV2(DataPageV2 page) { this.pageValueCount = page.getValueCount(); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); defColumn = new VectorizedDefValuesReader(bitWidth); try { defColumn.initFromBuffer(this.pageValueCount, page.getDefinitionLevels().toByteArray()); initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
private static DictionaryPage reusableCopy(DictionaryPage dict) { if (dict == null) { return null; } try { return new DictionaryPage( BytesInput.from(dict.getBytes().toByteArray()), dict.getDictionarySize(), dict.getEncoding()); } catch (IOException e) { throw new ParquetDecodingException("Cannot read dictionary", e); } }
private static DictionaryPage reusableCopy(DictionaryPage dict) { if (dict == null) { return null; } try { return new DictionaryPage( BytesInput.from(dict.getBytes().toByteArray()), dict.getDictionarySize(), dict.getEncoding()); } catch (IOException e) { throw new ParquetDecodingException("Cannot read dictionary", e); } }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
private void validateContains(MessageType schema, PageReadStore pages, String[] path, int values, BytesInput bytes) throws IOException { PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path)); DataPageV1 page = (DataPageV1) pageReader.readPage(); assertEquals(values, page.getValueCount()); assertArrayEquals(bytes.toByteArray(), page.getBytes().toByteArray()); }
private void readPageV1(DataPageV1 page) { this.pageValueCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); // Initialize the decoders. if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); } int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedDefValuesReader(bitWidth); ValuesReader dlReader = this.defColumn; try { byte[] bytes = page.getBytes().toByteArray(); LOG.debug("page size " + bytes.length + " bytes and " + pageValueCount + " records"); LOG.debug("reading repetition levels at 0"); rlReader.initFromPage(pageValueCount, bytes, 0); int next = rlReader.getNextOffset(); LOG.debug("reading definition levels at " + next); dlReader.initFromPage(pageValueCount, bytes, next); next = dlReader.getNextOffset(); LOG.debug("reading data at " + next); initDataReader(page.getValueEncoding(), bytes, next); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
private void readPageV1(DataPageV1 page) throws IOException { this.pageValueCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader; // Initialize the decoders. if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); } int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); dlReader = this.defColumn; this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { byte[] bytes = page.getBytes().toByteArray(); rlReader.initFromPage(pageValueCount, bytes, 0); int next = rlReader.getNextOffset(); dlReader.initFromPage(pageValueCount, bytes, next); next = dlReader.getNextOffset(); initDataReader(page.getValueEncoding(), bytes, next); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } }