@Override int nextInt() { try { return delegate.readInt(); } catch (IOException e) { throw new ParquetDecodingException(e); } } }
private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) { try { if (maxLevel == 0) { return new NullIntIterator(); } return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new ParquetDecodingException("could not read levels in page for col " + descriptor, e); } }
private void readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL); this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { BytesInput bytes = page.getBytes(); LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records"); ByteBufferInputStream in = bytes.toInputStream(); LOG.debug("reading repetition levels at " + in.position()); rlReader.initFromPage(pageValueCount, in); LOG.debug("reading definition levels at " + in.position()); dlReader.initFromPage(pageValueCount, in); LOG.debug("reading data at " + in.position()); initDataReader(page.getValueEncoding(), in, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
private void readPageV2(DataPageV2 page) { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = newRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records"); initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
private ByteBuffer getBuffer(int length) { try { return in.slice(length).order(ByteOrder.LITTLE_ENDIAN); } catch (IOException e) { throw new ParquetDecodingException("Failed to read " + length + " bytes", e); } }
private ByteBuffer getBuffer(int length) { try { return in.slice(length).order(ByteOrder.LITTLE_ENDIAN); } catch (IOException e) { throw new ParquetDecodingException("Failed to read " + length + " bytes", e); } }
@Override public final boolean readBoolean() { // TODO: vectorize decoding and keep boolean[] instead of currentByte if (bitOffset == 0) { try { currentByte = (byte) in.read(); } catch (IOException e) { throw new ParquetDecodingException("Failed to read a byte", e); } } boolean v = (currentByte & (1 << bitOffset)) != 0; bitOffset += 1; if (bitOffset == 8) { bitOffset = 0; } return v; }
@Override public final boolean readBoolean() { // TODO: vectorize decoding and keep boolean[] instead of currentByte if (bitOffset == 0) { try { currentByte = (byte) in.read(); } catch (IOException e) { throw new ParquetDecodingException("Failed to read a byte", e); } } boolean v = (currentByte & (1 << bitOffset)) != 0; bitOffset += 1; if (bitOffset == 8) { bitOffset = 0; } return v; }
throw new ParquetDecodingException("not a valid mode " + this.mode); throw new ParquetDecodingException("Failed to read from input stream", e);
throw new ParquetDecodingException("not a valid mode " + this.mode); throw new ParquetDecodingException("Failed to read from input stream", e);
return; default: throw new ParquetDecodingException("not a valid mode " + this.mode);
private void checkRead() { if (valuesRead >= totalValueCount) { throw new ParquetDecodingException("no more value to read, total value count is " + totalValueCount); } }
/** * {@inheritDoc} * @see org.apache.parquet.column.values.ValuesReader#readInteger() */ @Override public int readInteger() { try { return bitPackingReader.read(); } catch (IOException e) { throw new ParquetDecodingException(e); } }
@Override public double readDouble() { try { return in.readDouble(); } catch (IOException e) { throw new ParquetDecodingException("could not read double", e); } } }
@Override public double readDouble() { try { return dictionary.decodeToDouble(decoder.readInt()); } catch (IOException e) { throw new ParquetDecodingException(e); } }
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { if (descriptor.getType() != BINARY) { throw new ParquetDecodingException("Encoding DELTA_LENGTH_BYTE_ARRAY is only supported for type BINARY"); } return new DeltaLengthByteArrayValuesReader(); } },
@Override public double readDouble() { try { return dictionary.decodeToDouble(decoder.readInt()); } catch (IOException e) { throw new ParquetDecodingException(e); } }
@Override public long readLong() { try { return dictionary.decodeToLong(decoder.readInt()); } catch (IOException e) { throw new ParquetDecodingException(e); } }
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); LOG.debug("page data size {} bytes and {} records", page.getData().size(), pageValueCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } }
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); int valueCount = page.getValueCount(); LOG.debug("page data size {} bytes and {} values", page.getData().size(), valueCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), valueCount); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } newPageInitialized(page); }