org.apache.parquet.format.Util.readPageHeader java code examples

protected PageHeader readPageHeader()
    throws IOException
{
  return Util.readPageHeader(this);
}

public PageHeader readPageHeader() throws IOException{
 return Util.readPageHeader(input);
}

protected PageHeader readPageHeader() throws IOException {
 return Util.readPageHeader(stream);
}

protected PageHeader readPageHeader()
    throws IOException
{
  return Util.readPageHeader(this);
}

public PageHeader readPageHeader() throws IOException{
 return Util.readPageHeader(input);
}

protected PageHeader readPageHeader() throws IOException {
 return Util.readPageHeader(stream);
}

protected PageHeader readPageHeader() throws IOException {
 PageHeader pageHeader;
 stream.mark(8192); // headers should not be larger than 8k
 try {
  pageHeader = Util.readPageHeader(stream);
 } catch (IOException e) {
  // this is to workaround a bug where the compressedLength
  // of the chunk is missing the size of the header of the dictionary
  // to allow reading older files (using dictionary) we need this.
  // usually 13 to 19 bytes are missing
  // if the last page is smaller than this, the page header itself is truncated in the buffer.
  stream.reset(); // resetting the buffer to the position before we got the error
  LOG.info("completing the column chunk to read the page header");
  pageHeader = Util.readPageHeader(new SequenceInputStream(stream, f)); // trying again from the buffer + remainder of the stream.
 }
 return pageHeader;
}

protected PageHeader readPageHeader() throws IOException {
 PageHeader pageHeader;
 stream.mark(8192); // headers should not be larger than 8k
 try {
  pageHeader = Util.readPageHeader(stream);
 } catch (IOException e) {
  // this is to workaround a bug where the compressedLength
  // of the chunk is missing the size of the header of the dictionary
  // to allow reading older files (using dictionary) we need this.
  // usually 13 to 19 bytes are missing
  // if the last page is smaller than this, the page header itself is truncated in the buffer.
  stream.reset(); // resetting the buffer to the position before we got the error
  LOG.info("completing the column chunk to read the page header");
  pageHeader = Util.readPageHeader(new SequenceInputStream(stream, f)); // trying again from the buffer + remainder of the stream.
 }
 return pageHeader;
}

@Override
public DictionaryPage readDictionaryPage() {
 if (dictionaryPage == null) {
  PageHeader pageHeader = new PageHeader();
  long pos = 0;
  try {
   pos = in.getPos();
   pageHeader = Util.readPageHeader(in);
   if (pageHeader.getDictionary_page_header() == null) {
    in.seek(pos);
    return null;
   }
   dictionaryPage = readDictionaryPageHelper(pageHeader);
  } catch (Exception e) {
   throw new RuntimeException("Error reading dictionary page." +
    "\nFile path: " + path.toUri().getPath() +
    "\nRow count: " + rowCount +
    "\nColumn Chunk Metadata: " + metaData +
    "\nPage Header: " + pageHeader +
    "\nFile offset: " + fileOffset +
    "\nSize: " + size +
    "\nValue read so far: " + valueReadSoFar +
    "\nPosition: " + pos, e);
  }
 }
 return dictionaryPage;
}

private void loadDictionaryIfExists(final ColumnReader<?> parentStatus,
  final ColumnChunkMetaData columnChunkMetaData, final FSDataInputStream f) throws IOException {
 Stopwatch timer = Stopwatch.createUnstarted();
 if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
  f.seek(columnChunkMetaData.getDictionaryPageOffset());
  long start=f.getPos();
  timer.start();
  final PageHeader pageHeader = Util.readPageHeader(f);
  long timeToRead = timer.elapsed(TimeUnit.MICROSECONDS);
  long pageHeaderBytes=f.getPos()-start;
  this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes);
  assert pageHeader.type == PageType.DICTIONARY_PAGE;
  assert isDictionaryEncoded(columnChunkMetaData.getEncodings()) :
   format("Missing dictionary encoding for dictionary page %s, in column chunk %s", pageHeader, columnChunkMetaData);
  readDictionaryPage(pageHeader, parentStatus);
 }
}

private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName)
{
  try {
    ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
    PageHeader pageHeader = Util.readPageHeader(inputStream);
    if (pageHeader.type != PageType.DICTIONARY_PAGE) {
      return Optional.empty();
    }
    Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size());
    DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
    ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()));
    int dictionarySize = dicHeader.getNum_values();
    return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding));
  }
  catch (IOException ignored) {
    return Optional.empty();
  }
}

private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName)
{
  try {
    ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
    PageHeader pageHeader = Util.readPageHeader(inputStream);
    if (pageHeader.type != PageType.DICTIONARY_PAGE) {
      return Optional.empty();
    }
    Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size());
    DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
    ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()));
    int dictionarySize = dicHeader.getNum_values();
    return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding));
  }
  catch (IOException ignored) {
    return Optional.empty();
  }
}

try {
 pos = in.getPos();
 pageHeader = Util.readPageHeader(in);
 if (pageHeader.getDictionary_page_header() == null) {
  in.seek(pos);

timer.reset();
try {
 PageHeader pageHeader = Util.readPageHeader(parent.dataReader);
 int compressedSize = pageHeader.getCompressed_page_size();
 if ( parent.parentColumnReader.isShuttingDown ) { return null; } //Opportunity to skip expensive Parquet processing

/**
 * Reads and decompresses a dictionary page for the given column chunk.
 *
 * Returns null if the given column chunk has no dictionary page.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an uncompressed DictionaryPage or null
 * @throws IOException if there is an error while reading the dictionary
 */
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
 if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) &&
   !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) {
  return null;
 }
 // TODO: this should use getDictionaryPageOffset() but it isn't reliable.
 if (f.getPos() != meta.getStartingPos()) {
  f.seek(meta.getStartingPos());
 }
 PageHeader pageHeader = Util.readPageHeader(f);
 if (!pageHeader.isSetDictionary_page_header()) {
  return null; // TODO: should this complain?
 }
 DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f);
 BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());
 return new DictionaryPage(
   decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()),
   compressedPage.getDictionarySize(),
   compressedPage.getEncoding());
}

/**
 * Reads and decompresses a dictionary page for the given column chunk.
 *
 * Returns null if the given column chunk has no dictionary page.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an uncompressed DictionaryPage or null
 * @throws IOException if there is an error while reading the dictionary
 */
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
 if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) &&
   !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) {
  return null;
 }
 // TODO: this should use getDictionaryPageOffset() but it isn't reliable.
 if (f.getPos() != meta.getStartingPos()) {
  f.seek(meta.getStartingPos());
 }
 PageHeader pageHeader = Util.readPageHeader(f);
 if (!pageHeader.isSetDictionary_page_header()) {
  return null; // TODO: should this complain?
 }
 DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f);
 BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());
 return new DictionaryPage(
   decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()),
   compressedPage.getDictionarySize(),
   compressedPage.getEncoding());
}

final PageHeader pageHeader = Util.readPageHeader(f);
long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
long pageHeaderBytes=dataReader.getPos()-start;

/**
 * Get the page header and the pageData (uncompressed) for the next page
 */
protected void nextInternal() throws IOException{
 Stopwatch timer = Stopwatch.createUnstarted();
 // next, we need to decompress the bytes
 // TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one
 // I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary
 do {
  long start=dataReader.getPos();
  timer.start();
  pageHeader = Util.readPageHeader(dataReader);
  long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS);
  long pageHeaderBytes=dataReader.getPos()-start;
  this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes);
  logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}","Page Header Read","",
    this.parentColumnReader.parentReader.hadoopPath,
    this.parentColumnReader.columnDescriptor.toString(), start, 0, 0, timeToRead);
  timer.reset();
  if (pageHeader.getType() == PageType.DICTIONARY_PAGE) {
   readDictionaryPage(pageHeader, parentColumnReader);
  }
 } while (pageHeader.getType() == PageType.DICTIONARY_PAGE);
 int compressedSize = pageHeader.getCompressed_page_size();
 int uncompressedSize = pageHeader.getUncompressed_page_size();
 pageData = readPage(pageHeader, compressedSize, uncompressedSize);
}

pageHeader = Util.readPageHeader(in);
int uncompressedPageSize = pageHeader.getUncompressed_page_size();
int compressedPageSize = pageHeader.getCompressed_page_size();

releasePrevDataPageBuffers();
while(valueReadSoFar < metaData.getValueCount()) {
 pageHeader = Util.readPageHeader(in);
 int uncompressedPageSize = pageHeader.getUncompressed_page_size();
 int compressedPageSize = pageHeader.getCompressed_page_size();

Popular methods of Util

readFileMetaData
reads the meta data from the stream
protocol
read
write
writeFileMetaData
writePageHeader
readColumnIndex
readOffsetIndex
writeColumnIndex
writeOffsetIndex

Popular in Java

Making http post requests using okhttp
scheduleAtFixedRate (ScheduledExecutorService)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
notifyDataSetChanged (ArrayAdapter)
ObjectMapper (com.fasterxml.jackson.databind)
ObjectMapper provides functionality for reading and writing JSON, either to and from basic POJOs (Pl
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Notification (javax.management)
Github Copilot alternatives

How to use readPageHeadermethodin org.apache.parquet.format.Util

Best Java code snippets using org.apache.parquet.format.Util.readPageHeader (Showing top 20 results out of 315)

How to use
readPageHeader
method
in
org.apache.parquet.format.Util