org.apache.parquet.io.SeekableInputStream.seek java code examples

long footerLengthIndex = stat.getLen()
  - ParquetFooterInputFromCache.FOOTER_LENGTH_SIZE - ParquetFileWriter.MAGIC.length;
stream.seek(footerLengthIndex);
int footerLength = BytesUtils.readIntLittleEndian(stream);
stream.seek(footerLengthIndex - footerLength);
if (LOG.isInfoEnabled()) {
 LOG.info("Caching the footer of length " + footerLength + " for " + cacheKey);

private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException {
 long fileLen = file.getLength();
 String filePath = file.toString();
 LOG.debug("File length {}", fileLen);
 int FOOTER_LENGTH_SIZE = 4;
 if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC
  throw new RuntimeException(filePath + " is not a Parquet file (too small length: " + fileLen + ")");
 }
 long footerLengthIndex = fileLen - FOOTER_LENGTH_SIZE - MAGIC.length;
 LOG.debug("reading footer index at {}", footerLengthIndex);
 f.seek(footerLengthIndex);
 int footerLength = readIntLittleEndian(f);
 byte[] magic = new byte[MAGIC.length];
 f.readFully(magic);
 if (!Arrays.equals(MAGIC, magic)) {
  throw new RuntimeException(filePath + " is not a Parquet file. expected magic number at tail " + Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic));
 }
 long footerIndex = footerLengthIndex - footerLength;
 LOG.debug("read footer length: {}, footer index: {}", footerLength, footerIndex);
 if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) {
  throw new RuntimeException("corrupted file: the footer index is not within the file: " + footerIndex);
 }
 f.seek(footerIndex);
 return converter.readParquetMetadata(f, options.getMetadataFilter());
}

private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException {
 long fileLen = file.getLength();
 String filePath = file.toString();
 LOG.debug("File length {}", fileLen);
 int FOOTER_LENGTH_SIZE = 4;
 if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC
  throw new RuntimeException(filePath + " is not a Parquet file (too small length: " + fileLen + ")");
 }
 long footerLengthIndex = fileLen - FOOTER_LENGTH_SIZE - MAGIC.length;
 LOG.debug("reading footer index at {}", footerLengthIndex);
 f.seek(footerLengthIndex);
 int footerLength = readIntLittleEndian(f);
 byte[] magic = new byte[MAGIC.length];
 f.readFully(magic);
 if (!Arrays.equals(MAGIC, magic)) {
  throw new RuntimeException(filePath + " is not a Parquet file. expected magic number at tail " + Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic));
 }
 long footerIndex = footerLengthIndex - footerLength;
 LOG.debug("read footer length: {}, footer index: {}", footerLength, footerIndex);
 if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) {
  throw new RuntimeException("corrupted file: the footer index is not within the file: " + footerIndex);
 }
 f.seek(footerIndex);
 return converter.readParquetMetadata(f, options.getMetadataFilter());
}

/**
 * Copy from a FS input stream to an output stream. Thread-safe
 *
 * @param from a {@link SeekableInputStream}
 * @param to any {@link PositionOutputStream}
 * @param start where in the from stream to start copying
 * @param length the number of bytes to copy
 * @throws IOException if there is an error while reading or writing
 */
private static void copy(SeekableInputStream from, PositionOutputStream to,
             long start, long length) throws IOException{
 LOG.debug("Copying {} bytes at {} to {}" ,length , start , to.getPos());
 from.seek(start);
 long bytesCopied = 0;
 byte[] buffer = COPY_BUFFER.get();
 while (bytesCopied < length) {
  long bytesLeft = length - bytesCopied;
  int bytesRead = from.read(buffer, 0,
    (buffer.length < bytesLeft ? buffer.length : (int) bytesLeft));
  if (bytesRead < 0) {
   throw new IllegalArgumentException(
     "Unexpected end of input file at " + start + bytesCopied);
  }
  to.write(buffer, 0, bytesRead);
  bytesCopied += bytesRead;
 }
}

/**
 * Copy from a FS input stream to an output stream. Thread-safe
 *
 * @param from a {@link SeekableInputStream}
 * @param to any {@link PositionOutputStream}
 * @param start where in the from stream to start copying
 * @param length the number of bytes to copy
 * @throws IOException if there is an error while reading or writing
 */
private static void copy(SeekableInputStream from, PositionOutputStream to,
             long start, long length) throws IOException{
 LOG.debug("Copying {} bytes at {} to {}" ,length , start , to.getPos());
 from.seek(start);
 long bytesCopied = 0;
 byte[] buffer = COPY_BUFFER.get();
 while (bytesCopied < length) {
  long bytesLeft = length - bytesCopied;
  int bytesRead = from.read(buffer, 0,
    (buffer.length < bytesLeft ? buffer.length : (int) bytesLeft));
  if (bytesRead < 0) {
   throw new IllegalArgumentException(
     "Unexpected end of input file at " + start + bytesCopied);
  }
  to.write(buffer, 0, bytesRead);
  bytesCopied += bytesRead;
 }
}

/**
 * @param f file to read the blocks from
 * @return the ByteBuffer blocks
 * @throws IOException if there is an error while reading from the stream
 */
List<ByteBuffer> readBlocks(SeekableInputStream f, long offset, int length) throws IOException {
 f.seek(offset);
 int fullAllocations = length / options.getMaxAllocationSize();
 int lastAllocationSize = length % options.getMaxAllocationSize();
 int numAllocations = fullAllocations + (lastAllocationSize > 0 ? 1 : 0);
 List<ByteBuffer> buffers = new ArrayList<>(numAllocations);
 for (int i = 0; i < fullAllocations; i++) {
  buffers.add(options.getAllocator().allocate(options.getMaxAllocationSize()));
 }
 if (lastAllocationSize > 0) {
  buffers.add(options.getAllocator().allocate(lastAllocationSize));
 }
 for (ByteBuffer buffer : buffers) {
  f.readFully(buffer);
  buffer.flip();
 }
 return buffers;
}

f.seek(offset);

/**
 * @param column
 *          the column chunk which the offset index is to be returned for
 * @return the offset index for the specified column chunk or {@code null} if there is no index
 * @throws IOException
 *           if any I/O error occurs during reading the file
 */
@Private
public OffsetIndex readOffsetIndex(ColumnChunkMetaData column) throws IOException {
 IndexReference ref = column.getOffsetIndexReference();
 if (ref == null) {
  return null;
 }
 f.seek(ref.getOffset());
 return ParquetMetadataConverter.fromParquetOffsetIndex(Util.readOffsetIndex(f));
}

/**
 * @param column
 *          the column chunk which the column index is to be returned for
 * @return the column index for the specified column chunk or {@code null} if there is no index
 * @throws IOException
 *           if any I/O error occurs during reading the file
 */
@Private
public ColumnIndex readColumnIndex(ColumnChunkMetaData column) throws IOException {
 IndexReference ref = column.getColumnIndexReference();
 if (ref == null) {
  return null;
 }
 f.seek(ref.getOffset());
 return ParquetMetadataConverter.fromParquetColumnIndex(column.getPrimitiveType(), Util.readColumnIndex(f));
}

/**
 * Reads and decompresses a dictionary page for the given column chunk.
 *
 * Returns null if the given column chunk has no dictionary page.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an uncompressed DictionaryPage or null
 * @throws IOException if there is an error while reading the dictionary
 */
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
 if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) &&
   !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) {
  return null;
 }
 // TODO: this should use getDictionaryPageOffset() but it isn't reliable.
 if (f.getPos() != meta.getStartingPos()) {
  f.seek(meta.getStartingPos());
 }
 PageHeader pageHeader = Util.readPageHeader(f);
 if (!pageHeader.isSetDictionary_page_header()) {
  return null; // TODO: should this complain?
 }
 DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f);
 BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());
 return new DictionaryPage(
   decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()),
   compressedPage.getDictionarySize(),
   compressedPage.getEncoding());
}

/**
 * Reads and decompresses a dictionary page for the given column chunk.
 *
 * Returns null if the given column chunk has no dictionary page.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an uncompressed DictionaryPage or null
 * @throws IOException if there is an error while reading the dictionary
 */
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
 if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) &&
   !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) {
  return null;
 }
 // TODO: this should use getDictionaryPageOffset() but it isn't reliable.
 if (f.getPos() != meta.getStartingPos()) {
  f.seek(meta.getStartingPos());
 }
 PageHeader pageHeader = Util.readPageHeader(f);
 if (!pageHeader.isSetDictionary_page_header()) {
  return null; // TODO: should this complain?
 }
 DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f);
 BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());
 return new DictionaryPage(
   decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()),
   compressedPage.getDictionarySize(),
   compressedPage.getEncoding());
}

Javadoc

Seek to a new position in the InputStream.

Popular methods of SeekableInputStream

read
readFully
Read len bytes of data into an array, at position start. This method will block until len bytes are
close
getPos
Return the current position in the InputStream.

Popular in Java

Parsing JSON documents to java classes using gson
requestLocationUpdates (LocationManager)
getSystemService (Context)
scheduleAtFixedRate (Timer)
PrintStream (java.io)
Fake signature of an existing Java class.
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
ImageIO (javax.imageio)
Top Vim plugins

How to use seekmethodin org.apache.parquet.io.SeekableInputStream

Best Java code snippets using org.apache.parquet.io.SeekableInputStream.seek (Showing top 11 results out of 315)

How to use
seek
method
in
org.apache.parquet.io.SeekableInputStream