public OrcWriterOptions withStripeMaxRowCount(int stripeMaxRowCount) { return new OrcWriterOptions(stripeMinSize, stripeMaxSize, stripeMaxRowCount, rowGroupMaxRowCount, dictionaryMaxMemory, maxStringStatisticsLimit, maxCompressionBufferSize); }
public DataSize getDictionaryMaxMemory() { return options.getDictionaryMaxMemory(); }
public DataSize getMaxCompressionBufferSize() { return options.getMaxCompressionBufferSize(); }
ORC, NONE, new OrcWriterOptions() .withStripeMinSize(new DataSize(0, MEGABYTE)) .withStripeMaxSize(new DataSize(32, MEGABYTE)) .withStripeMaxRowCount(ORC_STRIPE_SIZE) .withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE) .withDictionaryMaxMemory(new DataSize(32, MEGABYTE)), ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE,
OrcWriterStats stats) this.validationBuilder = validate ? new OrcWriteValidation.OrcWriteValidationBuilder(validationMode, types).setStringStatisticsLimitInBytes(toIntExact(options.getMaxStringStatisticsLimit().toBytes())) : null; checkArgument(options.getStripeMaxSize().compareTo(options.getStripeMinSize()) >= 0, "stripeMaxSize must be greater than stripeMinSize"); this.stripeMinBytes = toIntExact(requireNonNull(options.getStripeMinSize(), "stripeMinSize is null").toBytes()); this.stripeMaxBytes = toIntExact(requireNonNull(options.getStripeMaxSize(), "stripeMaxSize is null").toBytes()); this.chunkMaxLogicalBytes = Math.max(1, stripeMaxBytes / 2); this.stripeMaxRowCount = options.getStripeMaxRowCount(); this.rowGroupMaxRowCount = options.getRowGroupMaxRowCount(); recordValidation(validation -> validation.setRowGroupMaxRowCount(rowGroupMaxRowCount)); this.maxCompressionBufferSize = toIntExact(options.getMaxCompressionBufferSize().toBytes()); int fieldColumnIndex = rootType.getFieldTypeIndex(fieldId); Type fieldType = types.get(fieldId); ColumnWriter columnWriter = createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, maxCompressionBufferSize, orcEncoding, hiveStorageTimeZone, options.getMaxStringStatisticsLimit()); columnWriters.add(columnWriter); stripeMaxBytes, stripeMaxRowCount, toIntExact(requireNonNull(options.getDictionaryMaxMemory(), "dictionaryMaxMemory is null").toBytes()));
private static OrcWriter createOrcFileWriter(OrcDataSink sink, List<Type> types) { List<String> columnNames = IntStream.range(0, types.size()) .mapToObj(String::valueOf) .collect(toImmutableList()); return new OrcWriter( sink, columnNames, types, ORC, LZ4, new OrcWriterOptions() .withMaxStringStatisticsLimit(new DataSize(0, BYTE)) .withStripeMinSize(new DataSize(64, MEGABYTE)) .withDictionaryMaxMemory(new DataSize(1, MEGABYTE)), ImmutableMap.of(), UTC, false, OrcWriteValidationMode.BOTH, new OrcWriterStats()); } }
compression, orcWriterOptions .withStripeMinSize(getOrcOptimizedWriterMinStripeSize(session)) .withStripeMaxSize(getOrcOptimizedWriterMaxStripeSize(session)) .withStripeMaxRowCount(getOrcOptimizedWriterMaxStripeRows(session)) .withDictionaryMaxMemory(getOrcOptimizedWriterMaxDictionaryMemory(session)) .withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)), fileInputColumnIndexes, ImmutableMap.<String, String>builder()
public int getRowGroupMaxRowCount() { return options.getRowGroupMaxRowCount(); }
public DataSize getStripeMaxSize() { return options.getStripeMaxSize(); }
public DataSize getStringStatisticsLimit() { return options.getMaxStringStatisticsLimit(); }
public DataSize getStripeMinSize() { return options.getStripeMinSize(); }
public int getStripeMaxRowCount() { return options.getStripeMaxRowCount(); }
@Config("hive.orc.writer.row-group-max-rows") public OrcFileWriterConfig setRowGroupMaxRowCount(int rowGroupMaxRowCount) { options = options.withRowGroupMaxRowCount(rowGroupMaxRowCount); return this; }
@Config("hive.orc.writer.dictionary-max-memory") public OrcFileWriterConfig setDictionaryMaxMemory(DataSize dictionaryMaxMemory) { options = options.withDictionaryMaxMemory(dictionaryMaxMemory); return this; }
ORC, NONE, new OrcWriterOptions() .withStripeMinSize(new DataSize(0, MEGABYTE)) .withStripeMaxSize(new DataSize(32, MEGABYTE)) .withStripeMaxRowCount(ORC_STRIPE_SIZE) .withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE) .withDictionaryMaxMemory(new DataSize(32, MEGABYTE)), ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE,
.setStringStatisticsLimitInBytes(toIntExact(options.getMaxStringStatisticsLimit().toBytes())) : null; checkArgument(options.getStripeMaxSize().compareTo(options.getStripeMinSize()) >= 0, "stripeMaxSize must be greater than stripeMinSize"); this.stripeMinBytes = toIntExact(requireNonNull(options.getStripeMinSize(), "stripeMinSize is null").toBytes()); this.stripeMaxBytes = toIntExact(requireNonNull(options.getStripeMaxSize(), "stripeMaxSize is null").toBytes()); this.chunkMaxLogicalBytes = Math.max(1, stripeMaxBytes / 2); this.stripeMaxRowCount = options.getStripeMaxRowCount(); this.rowGroupMaxRowCount = options.getRowGroupMaxRowCount(); recordValidation(validation -> validation.setRowGroupMaxRowCount(rowGroupMaxRowCount)); this.maxCompressionBufferSize = toIntExact(options.getMaxCompressionBufferSize().toBytes()); int fieldColumnIndex = rootType.getFieldTypeIndex(fieldId); Type fieldType = types.get(fieldId); ColumnWriter columnWriter = createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, maxCompressionBufferSize, orcEncoding, hiveStorageTimeZone, options.getMaxStringStatisticsLimit()); columnWriters.add(columnWriter); stripeMaxBytes, stripeMaxRowCount, toIntExact(requireNonNull(options.getDictionaryMaxMemory(), "dictionaryMaxMemory is null").toBytes()));
ORC, NONE, new OrcWriterOptions() .withStripeMinSize(new DataSize(0, MEGABYTE)) .withStripeMaxSize(new DataSize(32, MEGABYTE)) .withStripeMaxRowCount(ORC_STRIPE_SIZE) .withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE) .withDictionaryMaxMemory(new DataSize(32, MEGABYTE)), ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE,
public OrcWriterOptions withStripeMaxRowCount(int stripeMaxRowCount) { return new OrcWriterOptions(stripeMinSize, stripeMaxSize, stripeMaxRowCount, rowGroupMaxRowCount, dictionaryMaxMemory, maxStringStatisticsLimit, maxCompressionBufferSize); }
ORC, NONE, new OrcWriterOptions() .withStripeMinSize(new DataSize(0, MEGABYTE)) .withStripeMaxSize(new DataSize(32, MEGABYTE)) .withStripeMaxRowCount(ORC_STRIPE_SIZE) .withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE) .withDictionaryMaxMemory(new DataSize(32, MEGABYTE)), ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE,
public OrcWriterOptions withStripeMaxSize(DataSize stripeMaxSize) { return new OrcWriterOptions(stripeMinSize, stripeMaxSize, stripeMaxRowCount, rowGroupMaxRowCount, dictionaryMaxMemory, maxStringStatisticsLimit, maxCompressionBufferSize); }