protected PageHeader readPageHeader() throws IOException { return Util.readPageHeader(this); }
public PageHeader readPageHeader() throws IOException{ return Util.readPageHeader(input); }
protected PageHeader readPageHeader() throws IOException { return Util.readPageHeader(stream); }
protected PageHeader readPageHeader() throws IOException { return Util.readPageHeader(this); }
public PageHeader readPageHeader() throws IOException{ return Util.readPageHeader(input); }
protected PageHeader readPageHeader() throws IOException { return Util.readPageHeader(stream); }
protected PageHeader readPageHeader() throws IOException { PageHeader pageHeader; stream.mark(8192); // headers should not be larger than 8k try { pageHeader = Util.readPageHeader(stream); } catch (IOException e) { // this is to workaround a bug where the compressedLength // of the chunk is missing the size of the header of the dictionary // to allow reading older files (using dictionary) we need this. // usually 13 to 19 bytes are missing // if the last page is smaller than this, the page header itself is truncated in the buffer. stream.reset(); // resetting the buffer to the position before we got the error LOG.info("completing the column chunk to read the page header"); pageHeader = Util.readPageHeader(new SequenceInputStream(stream, f)); // trying again from the buffer + remainder of the stream. } return pageHeader; }
protected PageHeader readPageHeader() throws IOException { PageHeader pageHeader; stream.mark(8192); // headers should not be larger than 8k try { pageHeader = Util.readPageHeader(stream); } catch (IOException e) { // this is to workaround a bug where the compressedLength // of the chunk is missing the size of the header of the dictionary // to allow reading older files (using dictionary) we need this. // usually 13 to 19 bytes are missing // if the last page is smaller than this, the page header itself is truncated in the buffer. stream.reset(); // resetting the buffer to the position before we got the error LOG.info("completing the column chunk to read the page header"); pageHeader = Util.readPageHeader(new SequenceInputStream(stream, f)); // trying again from the buffer + remainder of the stream. } return pageHeader; }
@Override public DictionaryPage readDictionaryPage() { if (dictionaryPage == null) { PageHeader pageHeader = new PageHeader(); long pos = 0; try { pos = in.getPos(); pageHeader = Util.readPageHeader(in); if (pageHeader.getDictionary_page_header() == null) { in.seek(pos); return null; } dictionaryPage = readDictionaryPageHelper(pageHeader); } catch (Exception e) { throw new RuntimeException("Error reading dictionary page." + "\nFile path: " + path.toUri().getPath() + "\nRow count: " + rowCount + "\nColumn Chunk Metadata: " + metaData + "\nPage Header: " + pageHeader + "\nFile offset: " + fileOffset + "\nSize: " + size + "\nValue read so far: " + valueReadSoFar + "\nPosition: " + pos, e); } } return dictionaryPage; }
private void loadDictionaryIfExists(final ColumnReader<?> parentStatus, final ColumnChunkMetaData columnChunkMetaData, final FSDataInputStream f) throws IOException { Stopwatch timer = Stopwatch.createUnstarted(); if (columnChunkMetaData.getDictionaryPageOffset() > 0) { f.seek(columnChunkMetaData.getDictionaryPageOffset()); long start=f.getPos(); timer.start(); final PageHeader pageHeader = Util.readPageHeader(f); long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS); long pageHeaderBytes=f.getPos()-start; this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes); assert pageHeader.type == PageType.DICTIONARY_PAGE; assert isDictionaryEncoded(columnChunkMetaData.getEncodings()) : format("Missing dictionary encoding for dictionary page %s, in column chunk %s", pageHeader, columnChunkMetaData); readDictionaryPage(pageHeader, parentStatus); } }
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
try { pos = in.getPos(); pageHeader = Util.readPageHeader(in); if (pageHeader.getDictionary_page_header() == null) { in.seek(pos);
timer.reset(); try { PageHeader pageHeader = Util.readPageHeader(parent.dataReader); int compressedSize = pageHeader.getCompressed_page_size(); if ( parent.parentColumnReader.isShuttingDown ) { return null; } //Opportunity to skip expensive Parquet processing
/** * Reads and decompresses a dictionary page for the given column chunk. * * Returns null if the given column chunk has no dictionary page. * * @param meta a column's ColumnChunkMetaData to read the dictionary from * @return an uncompressed DictionaryPage or null * @throws IOException if there is an error while reading the dictionary */ DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException { if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) { return null; } // TODO: this should use getDictionaryPageOffset() but it isn't reliable. if (f.getPos() != meta.getStartingPos()) { f.seek(meta.getStartingPos()); } PageHeader pageHeader = Util.readPageHeader(f); if (!pageHeader.isSetDictionary_page_header()) { return null; // TODO: should this complain? } DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f); BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec()); return new DictionaryPage( decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding()); }
/** * Reads and decompresses a dictionary page for the given column chunk. * * Returns null if the given column chunk has no dictionary page. * * @param meta a column's ColumnChunkMetaData to read the dictionary from * @return an uncompressed DictionaryPage or null * @throws IOException if there is an error while reading the dictionary */ DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException { if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) { return null; } // TODO: this should use getDictionaryPageOffset() but it isn't reliable. if (f.getPos() != meta.getStartingPos()) { f.seek(meta.getStartingPos()); } PageHeader pageHeader = Util.readPageHeader(f); if (!pageHeader.isSetDictionary_page_header()) { return null; // TODO: should this complain? } DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f); BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec()); return new DictionaryPage( decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding()); }
final PageHeader pageHeader = Util.readPageHeader(f); long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS); long pageHeaderBytes=dataReader.getPos()-start;
/** * Get the page header and the pageData (uncompressed) for the next page */ protected void nextInternal() throws IOException{ Stopwatch timer = Stopwatch.createUnstarted(); // next, we need to decompress the bytes // TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one // I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary do { long start=dataReader.getPos(); timer.start(); pageHeader = Util.readPageHeader(dataReader); long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS); long pageHeaderBytes=dataReader.getPos()-start; this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes); logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}","Page Header Read","", this.parentColumnReader.parentReader.hadoopPath, this.parentColumnReader.columnDescriptor.toString(), start, 0, 0, timeToRead); timer.reset(); if (pageHeader.getType() == PageType.DICTIONARY_PAGE) { readDictionaryPage(pageHeader, parentColumnReader); } } while (pageHeader.getType() == PageType.DICTIONARY_PAGE); int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); pageData = readPage(pageHeader, compressedSize, uncompressedSize); }
pageHeader = Util.readPageHeader(in); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size();
releasePrevDataPageBuffers(); while(valueReadSoFar < metaData.getValueCount()) { pageHeader = Util.readPageHeader(in); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size();