private void readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL); this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { BytesInput bytes = page.getBytes(); LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records"); ByteBufferInputStream in = bytes.toInputStream(); LOG.debug("reading repetition levels at " + in.position()); rlReader.initFromPage(pageValueCount, in); LOG.debug("reading definition levels at " + in.position()); dlReader.initFromPage(pageValueCount, in); LOG.debug("reading data at " + in.position()); initDataReader(page.getValueEncoding(), in, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
private void readPageV2(DataPageV2 page) { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = newRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records"); initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
private SequenceBytesIn(List<BytesInput> inputs) { this.inputs = inputs; long total = 0; for (BytesInput input : inputs) { total += input.size(); } this.size = total; }
/** * creates an uncompressed page * @param bytes the content of the page * @param dictionarySize the value count in the dictionary * @param encoding the encoding used */ public DictionaryPage(BytesInput bytes, int dictionarySize, Encoding encoding) { this(bytes, (int)bytes.size(), dictionarySize, encoding); // TODO: fix sizes long or int }
/** * creates an uncompressed page * @param bytes the content of the page * @param dictionarySize the value count in the dictionary * @param encoding the encoding used */ public DictionaryPage(BytesInput bytes, int dictionarySize, Encoding encoding) { this(bytes, (int)bytes.size(), dictionarySize, encoding); // TODO: fix sizes long or int }
private SequenceBytesIn(List<BytesInput> inputs) { this.inputs = inputs; long total = 0; for (BytesInput input : inputs) { total += input.size(); } this.size = total; }
@SuppressWarnings("unused") @Override public void writeAllTo(OutputStream out) throws IOException { for (BytesInput input : inputs) { LOG.debug("write {} bytes to out", input.size()); if (input instanceof SequenceBytesIn) LOG.debug("{"); input.writeAllTo(out); if (input instanceof SequenceBytesIn) LOG.debug("}"); } }
@SuppressWarnings("unused") @Override public void writeAllTo(OutputStream out) throws IOException { for (BytesInput input : inputs) { LOG.debug("write {} bytes to out", input.size()); if (input instanceof SequenceBytesIn) LOG.debug("{"); input.writeAllTo(out); if (input instanceof SequenceBytesIn) LOG.debug("}"); } }
@Override public String toString() { return "Page [bytes.size=" + bytes.size() + ", entryCount=" + dictionarySize + ", uncompressedSize=" + getUncompressedSize() + ", encoding=" + encoding + "]"; }
/** * * @return a new byte array materializing the contents of this input * @throws IOException if there is an exception reading */ public byte[] toByteArray() throws IOException { BAOS baos = new BAOS((int)size()); this.writeAllTo(baos); LOG.debug("converted {} to byteArray of {} bytes", size() , baos.size()); return baos.getBuf(); }
/** * * @return a new byte array materializing the contents of this input * @throws IOException if there is an exception reading */ public byte[] toByteArray() throws IOException { BAOS baos = new BAOS((int)size()); this.writeAllTo(baos); LOG.debug("converted {} to byteArray of {} bytes", size() , baos.size()); return baos.getBuf(); }
/** * creates a dictionary page * @param bytes the (possibly compressed) content of the page * @param uncompressedSize the size uncompressed * @param dictionarySize the value count in the dictionary * @param encoding the encoding used */ public DictionaryPage(BytesInput bytes, int uncompressedSize, int dictionarySize, Encoding encoding) { super(Ints.checkedCast(bytes.size()), uncompressedSize); this.bytes = checkNotNull(bytes, "bytes"); this.dictionarySize = dictionarySize; this.encoding = checkNotNull(encoding, "encoding"); }
@Override public BytesInput getBytes() { try { // prepend the length of the column BytesInput rle = encoder.toBytes(); return BytesInput.concat(BytesInput.fromInt(Ints.checkedCast(rle.size())), rle); } catch (IOException e) { throw new ParquetEncodingException(e); } }
@Override public BytesInput getBytes() { try { // prepend the length of the column BytesInput rle = encoder.toBytes(); return BytesInput.concat(BytesInput.fromInt(Ints.checkedCast(rle.size())), rle); } catch (IOException e) { throw new ParquetEncodingException(e); } }
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
private void initFromPage(DataPageV2 page) { this.triplesCount = page.getValueCount(); this.repetitionLevels = newRLEIterator(desc.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevels = newRLEIterator(desc.getMaxDefinitionLevel(), page.getDefinitionLevels()); LOG.debug("page data size {} bytes and {} records", page.getData().size(), triplesCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), triplesCount); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + desc, e); } }
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); LOG.debug("page data size {} bytes and {} records", page.getData().size(), pageValueCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } }
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); int valueCount = page.getValueCount(); LOG.debug("page data size {} bytes and {} values", page.getData().size(), valueCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), valueCount); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } newPageInitialized(page); }