private void addRowGroup(BlockMetaData rowGroup) { this.rowGroups.add(rowGroup); this.compressedByteSize += rowGroup.getCompressedSize(); }
/** * get the total compressed size of the parquet files. * @return */ @Override public long getTotalSize () { long size = 0; for (BlockMetaData meta : this.getBlocks()) { size += meta.getCompressedSize(); } return size; } }
/** * @param rowGroupMetadata * @return true if the mid point of row group is in a new hdfs block, and also move the currentHDFSBlock pointer to the correct index that contains the row group; * return false if the mid point of row group is in the same hdfs block */ private boolean checkBelongingToANewHDFSBlock(BlockMetaData rowGroupMetadata) { boolean isNewHdfsBlock = false; long rowGroupMidPoint = rowGroupMetadata.getStartingPos() + (rowGroupMetadata.getCompressedSize() / 2); //if mid point is not in the current HDFS block any more, return true while (rowGroupMidPoint > getHDFSBlockEndingPosition(currentMidPointHDFSBlockIndex)) { isNewHdfsBlock = true; currentMidPointHDFSBlockIndex++; if (currentMidPointHDFSBlockIndex >= hdfsBlocks.length) throw new ParquetDecodingException("the row group is not in hdfs blocks in the file: midpoint of row groups is " + rowGroupMidPoint + ", the end of the hdfs block is " + getHDFSBlockEndingPosition(currentMidPointHDFSBlockIndex - 1)); } while (rowGroupMetadata.getStartingPos() > getHDFSBlockEndingPosition(currentStartHdfsBlockIndex)) { currentStartHdfsBlockIndex++; if (currentStartHdfsBlockIndex >= hdfsBlocks.length) throw new ParquetDecodingException("The row group does not start in this file: row group offset is " + rowGroupMetadata.getStartingPos() + " but the end of hdfs blocks of file is " + getHDFSBlockEndingPosition(currentStartHdfsBlockIndex)); } return isNewHdfsBlock; }