boolean useZeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(daemonConf); if (useZeroCopy != OrcConf.USE_ZEROCOPY.getBoolean(jobConf)) { jobConf = new Configuration(jobConf); jobConf.setBoolean(OrcConf.USE_ZEROCOPY.getAttribute(), useZeroCopy);
int bufferSize) throws IOException { int rowIndexStride = (int) OrcConf.ROW_INDEX_STRIDE.getLong(conf); boolean addBlockPadding = OrcConf.BLOCK_PADDING.getBoolean(conf); String versionName = OrcConf.WRITE_FORMAT.getString(conf); OrcFile.Version versionValue = (versionName == null) ? OrcFile.Version.CURRENT String enString = OrcConf.ENCODING_STRATEGY.getString(conf); if (enString == null) { encodingStrategy = OrcFile.EncodingStrategy.SPEED; final double paddingTolerance = OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(conf); long blockSizeValue = OrcConf.BLOCK_SIZE.getLong(conf); double bloomFilterFpp = OrcConf.BLOOM_FILTER_FPP.getDouble(conf);
double orcMemPool = this.parseCtx.getConf().getDouble(OrcConf.MEMORY_POOL.getHiveConfName(), (Double) OrcConf.MEMORY_POOL.getDefaultValue()); long orcStripSize = this.parseCtx.getConf().getLong(OrcConf.STRIPE_SIZE.getHiveConfName(), (Long) OrcConf.STRIPE_SIZE.getDefaultValue()); MemoryInfo memoryInfo = new MemoryInfo(this.parseCtx.getConf()); LOG.debug("Memory info during SDPO opt: {}", memoryInfo);
private static Options buildReaderOptions(TableMeta meta) { return new Options() .useZeroCopy(Boolean.parseBoolean(meta.getOption(OrcConf.USE_ZEROCOPY.getAttribute(), String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue())))) .skipCorruptRecords(Boolean.parseBoolean(meta.getOption(OrcConf.SKIP_CORRUPT_DATA.getAttribute(), String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue())))); }
int ratio = (int) OrcConf.BASE_DELTA_RATIO.getLong(options.getConfiguration()); writerOptions.bufferSize(baseBufferSizeValue / ratio); writerOptions.stripeSize(baseStripeSizeValue / ratio); writerOptions.encodingStrategy(org.apache.orc.OrcFile.EncodingStrategy.SPEED); writerOptions.rowIndexStride(0); writerOptions.getConfiguration().set(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), "-1.0");
jobConf.set(MAPRED_OUTPUT_SCHEMA.getAttribute(), OrcSchemaConverter.convert(fieldTypes, fieldNames).toString()); jobConf.set(COMPRESS.getAttribute(), compression.name()); jobConf.set(ROW_INDEX_STRIDE.getAttribute(), String.valueOf(rowIndexStride));
int ratio = (int) OrcConf.BASE_DELTA_RATIO.getLong(options.getConfiguration()); writerOptions.bufferSize(baseBufferSizeValue / ratio); writerOptions.stripeSize(baseStripeSizeValue / ratio);
/** * Create the memory manager. * @param conf use the configuration to find the maximum size of the memory * pool. */ public MemoryManagerImpl(Configuration conf) { double maxLoad = OrcConf.MEMORY_POOL.getDouble(conf); ROWS_BETWEEN_CHECKS = OrcConf.ROWS_BETWEEN_CHECKS.getLong(conf); LOG.info(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute() + "=" + ROWS_BETWEEN_CHECKS); if(ROWS_BETWEEN_CHECKS < 1 || ROWS_BETWEEN_CHECKS > 10000) { throw new IllegalArgumentException(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute() + "=" + ROWS_BETWEEN_CHECKS + " is outside valid range [1,10000]."); } totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean(). getHeapMemoryUsage().getMax() * maxLoad); ownerLock.lock(); }
public OrcColumnVectorProducer(MetadataCache metadataCache, LowLevelCache lowLevelCache, BufferUsageManager bufferManager, Configuration conf, LlapDaemonCacheMetrics cacheMetrics, LlapDaemonIOMetrics ioMetrics, FixedSizedObjectPool<IoTrace> tracePool) { LlapIoImpl.LOG.info("Initializing ORC column vector producer"); this.metadataCache = metadataCache; this.lowLevelCache = lowLevelCache; this.bufferManager = bufferManager; this.conf = conf; this._skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); this.cacheMetrics = cacheMetrics; this.ioMetrics = ioMetrics; this.tracePool = tracePool; }
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); OrcConf.COMPRESS.setString(jobConf, compression.name()); Properties tableProperties = new Properties(); tableProperties.setProperty(IOConstants.COLUMNS, "test"); tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName()); tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> {}); }
for (OrcConf property : OrcConf.values()){ String propName = property.getAttribute(); if (tableProps.containsKey(propName)){ jobProperties.put(propName, tableProps.get(propName));
StringBaseTreeWriter(int columnId, TypeDescription schema, StreamFactory writer, boolean nullable) throws IOException { super(columnId, schema, writer, nullable); this.isDirectV2 = isNewWriteFormat(writer); stringOutput = writer.createStream(id, OrcProto.Stream.Kind.DICTIONARY_DATA); lengthOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); rowOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.DATA), false, isDirectV2, writer); recordPosition(rowIndexPosition); rowIndexValueCount.add(0L); buildIndex = writer.buildIndex(); directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA); directLengthOutput = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer); Configuration conf = writer.getConfiguration(); dictionaryKeySizeThreshold = org.apache.orc.OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getDouble(conf); strideDictionaryCheck = org.apache.orc.OrcConf.ROW_INDEX_STRIDE_DICTIONARY_CHECK.getBoolean(conf); doneDictionaryCheck = false; }
final String columnNameDelimiter = table.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? table .getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA); String compressType = OrcConf.COMPRESS.getString(table, conf);
long length) { TypeDescription schema = TypeDescription.fromString(OrcConf.MAPRED_INPUT_SCHEMA.getString(conf)); Reader.Options options = reader.options() .range(start, length) .useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf)) .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf)) .tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf)); if (schema != null) { options.schema(schema); OrcConf.INCLUDE_COLUMNS.getString(conf))); String kryoSarg = OrcConf.KRYO_SARG.getString(conf); String sargColumns = OrcConf.SARG_COLUMNS.getString(conf); if (kryoSarg != null && sargColumns != null) { byte[] sargBytes = Base64.decodeBase64(kryoSarg);
public LlapAwareMemoryManager(Configuration conf) { super(conf); maxLoad = OrcConf.MEMORY_POOL.getDouble(conf); long memPerExecutor = LlapDaemonInfo.INSTANCE.getMemoryPerExecutor(); totalMemoryPool = (long) (memPerExecutor * maxLoad); if (LOG.isDebugEnabled()) { LOG.debug("Using LLAP memory manager for orc writer. memPerExecutor: {} maxLoad: {} totalMemPool: {}", LlapUtil.humanReadableByteCount(memPerExecutor), maxLoad, LlapUtil.humanReadableByteCount(totalMemoryPool)); } }
this.rowIndexStride = fileReader.rowIndexStride; this.ignoreNonUtf8BloomFilter = OrcConf.IGNORE_NON_UTF8_BLOOM_FILTERS.getBoolean(fileReader.conf); SearchArgument sarg = options.getSearchArgument(); if (sarg != null && rowIndexStride != 0) { this.maxDiskRangeChunkLimit = OrcConf.ORC_MAX_DISK_RANGE_CHUNK_LIMIT.getInt(fileReader.conf); Boolean zeroCopy = options.getUseZeroCopy(); if (zeroCopy == null) { zeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(fileReader.conf); Boolean skipCorrupt = options.getSkipCorruptRecords(); if (skipCorrupt == null) { skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(fileReader.conf);
private int determineUncompressedPartSize() { // We will break the uncompressed data in the cache in the chunks that are the size // of the prevalent ORC compression buffer (the default), or maximum allocation (since we // cannot allocate bigger chunks), whichever is less. long orcCbSizeDefault = ((Number)OrcConf.BUFFER_SIZE.getDefaultValue()).longValue(); int maxAllocSize = cacheWrapper.getAllocator().getMaxAllocation(); return (int)Math.min(maxAllocSize, orcCbSizeDefault); }
/** * Put the given SearchArgument into the configuration for an OrcInputFormat. * @param conf the configuration to modify * @param sarg the SearchArgument to put in the configuration * @param columnNames the list of column names for the SearchArgument */ public static void setSearchArgument(Configuration conf, SearchArgument sarg, String[] columnNames) { Output out = new Output(100000); new Kryo().writeObject(out, sarg); OrcConf.KRYO_SARG.setString(conf, Base64.encodeBase64String(out.toBytes())); StringBuilder buffer = new StringBuilder(); for (int i = 0; i < columnNames.length; ++i) { if (i != 0) { buffer.append(','); } buffer.append(columnNames[i]); } OrcConf.SARG_COLUMNS.setString(conf, buffer.toString()); }
private static Options buildReaderOptions(TableMeta meta) { return new Options() .useZeroCopy(Boolean.parseBoolean(meta.getProperty(OrcConf.USE_ZEROCOPY.getAttribute(), String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue())))) .skipCorruptRecords(Boolean.parseBoolean(meta.getProperty(OrcConf.SKIP_CORRUPT_DATA.getAttribute(), String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue())))); }