private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck, int maxRowCountForPageSizeCheck, boolean estimateNextSizeCheck, ByteBufferAllocator allocator, ValuesWriterFactory writerFactory) { this.pageSizeThreshold = pageSize; this.initialSlabSize = CapacityByteArrayOutputStream .initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10); this.dictionaryPageSizeThreshold = dictPageSize; this.writerVersion = writerVersion; this.enableDictionary = enableDict; this.minRowCountForPageSizeCheck = minRowCountForPageSizeCheck; this.maxRowCountForPageSizeCheck = maxRowCountForPageSizeCheck; this.estimateNextSizeCheck = estimateNextSizeCheck; this.allocator = allocator; this.valuesWriterFactory = writerFactory; }
private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck, int maxRowCountForPageSizeCheck, boolean estimateNextSizeCheck, ByteBufferAllocator allocator, ValuesWriterFactory writerFactory, int columnIndexMinMaxTruncateLength, int pageRowCountLimit) { this.pageSizeThreshold = pageSize; this.initialSlabSize = CapacityByteArrayOutputStream .initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10); this.dictionaryPageSizeThreshold = dictPageSize; this.writerVersion = writerVersion; this.enableDictionary = enableDict; this.minRowCountForPageSizeCheck = minRowCountForPageSizeCheck; this.maxRowCountForPageSizeCheck = maxRowCountForPageSizeCheck; this.estimateNextSizeCheck = estimateNextSizeCheck; this.allocator = allocator; this.valuesWriterFactory = writerFactory; this.columnIndexTruncateLength = columnIndexMinMaxTruncateLength; this.pageRowCountLimit = pageRowCountLimit; }
/** * Construct a CapacityByteArrayOutputStream configured such that its initial slab size is * determined by {@link #initialSlabSizeHeuristic}, with targetCapacity == maxCapacityHint * * @param minSlabSize a minimum slab size * @param maxCapacityHint a hint for the maximum required capacity * @param targetNumSlabs the target number of slabs * @param allocator an allocator to use when creating byte buffers for slabs * @return a capacity baos */ public static CapacityByteArrayOutputStream withTargetNumSlabs( int minSlabSize, int maxCapacityHint, int targetNumSlabs, ByteBufferAllocator allocator) { return new CapacityByteArrayOutputStream( initialSlabSizeHeuristic(minSlabSize, maxCapacityHint, targetNumSlabs), maxCapacityHint, allocator); }
/** * Construct a CapacityByteArrayOutputStream configured such that its initial slab size is * determined by {@link #initialSlabSizeHeuristic}, with targetCapacity == maxCapacityHint * * @param minSlabSize a minimum slab size * @param maxCapacityHint a hint for the maximum required capacity * @param targetNumSlabs the target number of slabs * @param allocator an allocator to use when creating byte buffers for slabs * @return a capacity baos */ public static CapacityByteArrayOutputStream withTargetNumSlabs( int minSlabSize, int maxCapacityHint, int targetNumSlabs, ByteBufferAllocator allocator) { return new CapacityByteArrayOutputStream( initialSlabSizeHeuristic(minSlabSize, maxCapacityHint, targetNumSlabs), maxCapacityHint, allocator); }
@Override public BytesInput getBytes() { int maxDicId = getDictionarySize() - 1; LOG.debug("max dic id {}", maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10); RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator); encoders.add(encoder); IntIterator iterator = encodedValues.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] { (byte) bitWidth }; BytesInput rleEncodedBytes = encoder.toBytes(); LOG.debug("rle encoded bytes {}", rleEncodedBytes.size()); BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes); // remember size of dictionary when we last wrote a page lastUsedDictionarySize = getDictionarySize(); lastUsedDictionaryByteSize = dictionaryByteSize; return bytes; } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } }
@Override public BytesInput getBytes() { int maxDicId = getDictionarySize() - 1; LOG.debug("max dic id {}", maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10); RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator); encoders.add(encoder); IntIterator iterator = encodedValues.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] { (byte) bitWidth }; BytesInput rleEncodedBytes = encoder.toBytes(); LOG.debug("rle encoded bytes {}", rleEncodedBytes.size()); BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes); // remember size of dictionary when we last wrote a page lastUsedDictionarySize = getDictionarySize(); lastUsedDictionaryByteSize = dictionaryByteSize; return bytes; } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } }
int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(64, pageSize, 10);