int ratio = (int) OrcConf.BASE_DELTA_RATIO.getLong(options.getConfiguration()); writerOptions.bufferSize(baseBufferSizeValue / ratio); writerOptions.stripeSize(baseStripeSizeValue / ratio);
private void testDeleteEventFiltering3() throws Exception { boolean filterOn = HiveConf.getBoolVar(conf, HiveConf.ConfVars.FILTER_DELETE_EVENTS); boolean columnStatsPresent = OrcConf.ROW_INDEX_STRIDE.getLong(conf) != 0;
int ratio = (int) OrcConf.BASE_DELTA_RATIO.getLong(options.getConfiguration()); writerOptions.bufferSize(baseBufferSizeValue / ratio); writerOptions.stripeSize(baseStripeSizeValue / ratio);
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
public long getLong(Configuration conf) { return getLong(null, conf); }
/** * Create the memory manager. * @param conf use the configuration to find the maximum size of the memory * pool. */ public MemoryManagerImpl(Configuration conf) { double maxLoad = OrcConf.MEMORY_POOL.getDouble(conf); ROWS_BETWEEN_CHECKS = OrcConf.ROWS_BETWEEN_CHECKS.getLong(conf); LOG.info(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute() + "=" + ROWS_BETWEEN_CHECKS); if(ROWS_BETWEEN_CHECKS < 1 || ROWS_BETWEEN_CHECKS > 10000) { throw new IllegalArgumentException(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute() + "=" + ROWS_BETWEEN_CHECKS + " is outside valid range [1,10000]."); } totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean(). getHeapMemoryUsage().getMax() * maxLoad); ownerLock.lock(); }
protected WriterOptions(Properties tableProperties, Configuration conf) { configuration = conf; memoryManagerValue = getStaticMemoryManager(conf); stripeSizeValue = org.apache.orc.OrcConf.STRIPE_SIZE.getLong(tableProperties, conf); blockSizeValue = org.apache.orc.OrcConf.BLOCK_SIZE.getLong(tableProperties, conf); rowIndexStrideValue = (int) org.apache.orc.OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf); bufferSizeValue = (int) org.apache.orc.OrcConf.BUFFER_SIZE.getLong(tableProperties, conf); blockPaddingValue = org.apache.orc.OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf); compressValue = CompressionKind.valueOf(org.apache.orc.OrcConf.COMPRESS.getString(tableProperties, conf)); String versionName = org.apache.orc.OrcConf.WRITE_FORMAT.getString(tableProperties, conf); versionValue = Version.byName(versionName); String enString = org.apache.orc.OrcConf.ENCODING_STRATEGY.getString(tableProperties, conf); encodingStrategy = EncodingStrategy.valueOf(enString); String compString = org.apache.orc.OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf); compressionStrategy = CompressionStrategy.valueOf(compString); paddingTolerance = org.apache.orc.OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf); bloomFilterColumns = org.apache.orc.OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties, conf); bloomFilterFpp = org.apache.orc.OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties, conf); }
protected WriterOptions(Properties tableProperties, Configuration conf) { configuration = conf; memoryManagerValue = getStaticMemoryManager(conf); stripeSizeValue = org.apache.orc.OrcConf.STRIPE_SIZE.getLong(tableProperties, conf); blockSizeValue = org.apache.orc.OrcConf.BLOCK_SIZE.getLong(tableProperties, conf); rowIndexStrideValue = (int) org.apache.orc.OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf); bufferSizeValue = (int) org.apache.orc.OrcConf.BUFFER_SIZE.getLong(tableProperties, conf); blockPaddingValue = org.apache.orc.OrcConf.BLOCK_PADDING.getBoolean(tableProperties, conf); compressValue = CompressionKind.valueOf(org.apache.orc.OrcConf.COMPRESS.getString(tableProperties, conf)); String versionName = org.apache.orc.OrcConf.WRITE_FORMAT.getString(tableProperties, conf); versionValue = Version.byName(versionName); String enString = org.apache.orc.OrcConf.ENCODING_STRATEGY.getString(tableProperties, conf); encodingStrategy = EncodingStrategy.valueOf(enString); String compString = org.apache.orc.OrcConf.COMPRESSION_STRATEGY.getString(tableProperties, conf); compressionStrategy = CompressionStrategy.valueOf(compString); paddingTolerance = org.apache.orc.OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf); bloomFilterColumns = org.apache.orc.OrcConf.BLOOM_FILTER_COLUMNS.getString(tableProperties, conf); bloomFilterFpp = org.apache.orc.OrcConf.BLOOM_FILTER_FPP.getDouble(tableProperties, conf); }
protected RecordReader createReader( FileInputSplit fileSplit, TaskAttemptContext taskAttemptContext) throws IOException { // by default, we use org.apache.orc.mapreduce.OrcMapreduceRecordReader Configuration hadoopConf = taskAttemptContext.getConfiguration(); org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(fileSplit.getPath().toUri()); Reader file = OrcFile.createReader(filePath, OrcFile.readerOptions(hadoopConf).maxLength(OrcConf.MAX_FILE_LENGTH.getLong(hadoopConf))); return new OrcMapreduceRecordReader<>(file, org.apache.orc.mapred.OrcInputFormat.buildOptions(hadoopConf, file, fileSplit.getStart(), fileSplit.getLength())); } }
@Override public RecordReader<NullWritable, V> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext ) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader file = OrcFile.createReader(split.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))); return new OrcMapreduceRecordReader<>(file, org.apache.orc.mapred.OrcInputFormat.buildOptions(conf, file, split.getStart(), split.getLength())); }
OrcFile.readerOptions(hadoopConf).maxLength(OrcConf.MAX_FILE_LENGTH.getLong(hadoopConf)));
memoryManagerValue = getStaticMemoryManager(conf); overwrite = OrcConf.OVERWRITE_OUTPUT_FILE.getBoolean(tableProperties, conf); stripeSizeValue = OrcConf.STRIPE_SIZE.getLong(tableProperties, conf); blockSizeValue = OrcConf.BLOCK_SIZE.getLong(tableProperties, conf); rowIndexStrideValue = (int) OrcConf.ROW_INDEX_STRIDE.getLong(tableProperties, conf); bufferSizeValue = (int) OrcConf.BUFFER_SIZE.getLong(tableProperties, conf); blockPaddingValue =
@Override public RecordReader<NullWritable, V> getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { FileSplit split = (FileSplit) inputSplit; Reader file = OrcFile.createReader(split.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))); return new OrcMapredRecordReader<>(file, buildOptions(conf, file, split.getStart(), split.getLength())); }
@Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); orcSchema = reader.getSchema(); requestedColumnIds = OrcUtils.requestedColumnIds(caseSensitive, fieldNames, schemaFieldNames, reader); initBatch(); }
/** * This function builds the options for the ORC Writer based on the JobConf. * @param conf the job configuration * @return a new options object */ public static OrcFile.WriterOptions buildOptions(Configuration conf) { return OrcFile.writerOptions(conf) .version(OrcFile.Version.byName(OrcConf.WRITE_FORMAT.getString(conf))) .setSchema(TypeDescription.fromString(OrcConf.MAPRED_OUTPUT_SCHEMA .getString(conf))) .compress(CompressionKind.valueOf(OrcConf.COMPRESS.getString(conf))) .encodingStrategy(OrcFile.EncodingStrategy.valueOf (OrcConf.ENCODING_STRATEGY.getString(conf))) .bloomFilterColumns(OrcConf.BLOOM_FILTER_COLUMNS.getString(conf)) .bloomFilterFpp(OrcConf.BLOOM_FILTER_FPP.getDouble(conf)) .blockSize(OrcConf.BLOCK_SIZE.getLong(conf)) .blockPadding(OrcConf.BLOCK_PADDING.getBoolean(conf)) .stripeSize(OrcConf.STRIPE_SIZE.getLong(conf)) .rowIndexStride((int) OrcConf.ROW_INDEX_STRIDE.getLong(conf)) .bufferSize((int) OrcConf.BUFFER_SIZE.getLong(conf)) .paddingTolerance(OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(conf)); }