/** * Compute a bytes representation of the payload by serializing the contents This is used to estimate the size of the * payload (either in memory or when written to disk) */ public static <R> long computePayloadSize(R value, SizeEstimator<R> valueSizeEstimator) throws IOException { return valueSizeEstimator.sizeEstimate(value); }
/** * Samples records with "RECORD_SAMPLING_RATE" frequency and computes average record size in bytes. It is used * for determining how many maximum records to queue. Based on change in avg size it ma increase or decrease * available permits. * * @param payload Payload to size */ private void adjustBufferSizeIfNeeded(final O payload) throws InterruptedException { if (this.samplingRecordCounter.incrementAndGet() % RECORD_SAMPLING_RATE != 0) { return; } final long recordSizeInBytes = payloadSizeEstimator.sizeEstimate(payload); final long newAvgRecordSizeInBytes = Math .max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1)); final int newRateLimit = (int) Math .min(RECORD_CACHING_LIMIT, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes)); // If there is any change in number of records to cache then we will either release (if it increased) or acquire // (if it decreased) to adjust rate limiting to newly computed value. if (newRateLimit > currentRateLimit) { rateLimiter.release(newRateLimit - currentRateLimit); } else if (newRateLimit < currentRateLimit) { rateLimiter.acquire(currentRateLimit - newRateLimit); } currentRateLimit = newRateLimit; avgRecordSizeInBytes = newAvgRecordSizeInBytes; numSamples++; }
@Override public R put(T key, R value) { if (this.currentInMemoryMapSize < maxInMemorySizeInBytes || inMemoryMap.containsKey(key)) { if (shouldEstimatePayloadSize && estimatedPayloadSize == 0) { // At first, use the sizeEstimate of a record being inserted into the spillable map. // Note, the converter may over estimate the size of a record in the JVM this.estimatedPayloadSize = keySizeEstimator.sizeEstimate(key) + valueSizeEstimator.sizeEstimate(value); log.info("Estimated Payload size => " + estimatedPayloadSize); } else if (shouldEstimatePayloadSize && inMemoryMap.size() % NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE == 0) { // Re-estimate the size of a record by calculating the size of the entire map containing // N entries and then dividing by the number of entries present (N). This helps to get a // correct estimation of the size of each record in the JVM. long totalMapSize = ObjectSizeCalculator.getObjectSize(inMemoryMap); this.currentInMemoryMapSize = totalMapSize; this.estimatedPayloadSize = totalMapSize / inMemoryMap.size(); shouldEstimatePayloadSize = false; log.info("New Estimated Payload size => " + this.estimatedPayloadSize); } if (!inMemoryMap.containsKey(key)) { // TODO : Add support for adjusting payloadSize for updates to the same key currentInMemoryMapSize += this.estimatedPayloadSize; } inMemoryMap.put(key, value); } else { diskBasedMap.put(key, value); } return value; }
final long objSize = sizeEstimator.sizeEstimate(new Tuple2<>(payload.record, payload.insertValue)); final long memoryLimitInBytes = 4 * objSize;
HoodieInsertValueGenResult<HoodieRecord> payload = getTransformFunction(HoodieTestDataGenerator.avroSchema) .apply(hoodieRecords.get(0)); final long objSize = sizeEstimator.sizeEstimate(payload); final long memoryLimitInBytes = recordLimit * objSize; final BoundedInMemoryQueue<HoodieRecord, HoodieInsertValueGenResult<HoodieRecord>> queue =