String propName = property.getAttribute(); if (tableProps.containsKey(propName)){ jobProperties.put(propName, tableProps.get(propName));
if (useZeroCopy != OrcConf.USE_ZEROCOPY.getBoolean(jobConf)) { jobConf = new Configuration(jobConf); jobConf.setBoolean(OrcConf.USE_ZEROCOPY.getAttribute(), useZeroCopy);
writerOptions.encodingStrategy(org.apache.orc.OrcFile.EncodingStrategy.SPEED); writerOptions.rowIndexStride(0); writerOptions.getConfiguration().set(OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute(), "-1.0");
/** * Create the memory manager. * @param conf use the configuration to find the maximum size of the memory * pool. */ public MemoryManagerImpl(Configuration conf) { double maxLoad = OrcConf.MEMORY_POOL.getDouble(conf); ROWS_BETWEEN_CHECKS = OrcConf.ROWS_BETWEEN_CHECKS.getLong(conf); LOG.info(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute() + "=" + ROWS_BETWEEN_CHECKS); if(ROWS_BETWEEN_CHECKS < 1 || ROWS_BETWEEN_CHECKS > 10000) { throw new IllegalArgumentException(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute() + "=" + ROWS_BETWEEN_CHECKS + " is outside valid range [1,10000]."); } totalMemoryPool = Math.round(ManagementFactory.getMemoryMXBean(). getHeapMemoryUsage().getMax() * maxLoad); ownerLock.lock(); }
private static Options buildReaderOptions(TableMeta meta) { return new Options() .useZeroCopy(Boolean.parseBoolean(meta.getOption(OrcConf.USE_ZEROCOPY.getAttribute(), String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue())))) .skipCorruptRecords(Boolean.parseBoolean(meta.getOption(OrcConf.SKIP_CORRUPT_DATA.getAttribute(), String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue())))); }
private static Options buildReaderOptions(TableMeta meta) { return new Options() .useZeroCopy(Boolean.parseBoolean(meta.getProperty(OrcConf.USE_ZEROCOPY.getAttribute(), String.valueOf(OrcConf.USE_ZEROCOPY.getDefaultValue())))) .skipCorruptRecords(Boolean.parseBoolean(meta.getProperty(OrcConf.SKIP_CORRUPT_DATA.getAttribute(), String.valueOf(OrcConf.SKIP_CORRUPT_DATA.getDefaultValue())))); }
private static CompressionKind getCompressionKind(TableMeta meta) { String kindstr = meta.getOption(OrcConf.COMPRESS.getAttribute(), String.valueOf(OrcConf.COMPRESS.getDefaultValue())); if (kindstr.equalsIgnoreCase(CompressionKind.ZLIB.name())) { return CompressionKind.ZLIB; } if (kindstr.equalsIgnoreCase(CompressionKind.SNAPPY.name())) { return CompressionKind.SNAPPY; } if (kindstr.equalsIgnoreCase(CompressionKind.LZO.name())) { return CompressionKind.LZO; } return CompressionKind.NONE; }
private static CompressionKind getCompressionKind(TableMeta meta) { String kindstr = meta.getProperty(OrcConf.COMPRESS.getAttribute(), String.valueOf(OrcConf.COMPRESS.getDefaultValue())); if (kindstr.equalsIgnoreCase(CompressionKind.ZLIB.name())) { return CompressionKind.ZLIB; } if (kindstr.equalsIgnoreCase(CompressionKind.SNAPPY.name())) { return CompressionKind.SNAPPY; } if (kindstr.equalsIgnoreCase(CompressionKind.LZO.name())) { return CompressionKind.LZO; } return CompressionKind.NONE; }
private static OrcFile.WriterOptions buildWriterOptions(Configuration conf, TableMeta meta, Schema schema) { return OrcFile.writerOptions(conf) .setSchema(OrcUtils.convertSchema(schema)) .compress(getCompressionKind(meta)) .stripeSize(Long.parseLong(meta.getProperty(OrcConf.STRIPE_SIZE.getAttribute(), String.valueOf(OrcConf.STRIPE_SIZE.getDefaultValue())))) .blockSize(Long.parseLong(meta.getProperty(OrcConf.BLOCK_SIZE.getAttribute(), String.valueOf(OrcConf.BLOCK_SIZE.getDefaultValue())))) .rowIndexStride(Integer.parseInt(meta.getProperty(OrcConf.ROW_INDEX_STRIDE.getAttribute(), String.valueOf(OrcConf.ROW_INDEX_STRIDE.getDefaultValue())))) .bufferSize(Integer.parseInt(meta.getProperty(OrcConf.BUFFER_SIZE.getAttribute(), String.valueOf(OrcConf.BUFFER_SIZE.getDefaultValue())))) .blockPadding(Boolean.parseBoolean(meta.getProperty(OrcConf.BLOCK_PADDING.getAttribute(), String.valueOf(OrcConf.BLOCK_PADDING.getDefaultValue())))) .encodingStrategy(EncodingStrategy.valueOf(meta.getProperty(OrcConf.ENCODING_STRATEGY.getAttribute(), String.valueOf(OrcConf.ENCODING_STRATEGY.getDefaultValue())))) .bloomFilterFpp(Double.parseDouble(meta.getProperty(OrcConf.BLOOM_FILTER_FPP.getAttribute(), String.valueOf(OrcConf.BLOOM_FILTER_FPP.getDefaultValue())))) .bloomFilterColumns(meta.getProperty(OrcConf.BLOOM_FILTER_COLUMNS.getAttribute(), String.valueOf(OrcConf.BLOOM_FILTER_COLUMNS.getDefaultValue()))); }
private static OrcFile.WriterOptions buildWriterOptions(Configuration conf, TableMeta meta, Schema schema) { return OrcFile.writerOptions(conf) .setSchema(OrcUtils.convertSchema(schema)) .compress(getCompressionKind(meta)) .stripeSize(Long.parseLong(meta.getOption(OrcConf.STRIPE_SIZE.getAttribute(), String.valueOf(OrcConf.STRIPE_SIZE.getDefaultValue())))) .blockSize(Long.parseLong(meta.getOption(OrcConf.BLOCK_SIZE.getAttribute(), String.valueOf(OrcConf.BLOCK_SIZE.getDefaultValue())))) .rowIndexStride(Integer.parseInt(meta.getOption(OrcConf.ROW_INDEX_STRIDE.getAttribute(), String.valueOf(OrcConf.ROW_INDEX_STRIDE.getDefaultValue())))) .bufferSize(Integer.parseInt(meta.getOption(OrcConf.BUFFER_SIZE.getAttribute(), String.valueOf(OrcConf.BUFFER_SIZE.getDefaultValue())))) .blockPadding(Boolean.parseBoolean(meta.getOption(OrcConf.BLOCK_PADDING.getAttribute(), String.valueOf(OrcConf.BLOCK_PADDING.getDefaultValue())))) .encodingStrategy(EncodingStrategy.valueOf(meta.getOption(OrcConf.ENCODING_STRATEGY.getAttribute(), String.valueOf(OrcConf.ENCODING_STRATEGY.getDefaultValue())))) .bloomFilterFpp(Double.parseDouble(meta.getOption(OrcConf.BLOOM_FILTER_FPP.getAttribute(), String.valueOf(OrcConf.BLOOM_FILTER_FPP.getDefaultValue())))) .bloomFilterColumns(meta.getOption(OrcConf.BLOOM_FILTER_COLUMNS.getAttribute(), String.valueOf(OrcConf.BLOOM_FILTER_COLUMNS.getDefaultValue()))); }
boolean zeroCopySetByUser = userPropertyNames.contains(OrcConf.USE_ZEROCOPY.getAttribute()) || userPropertyNames.contains(HiveConf.ConfVars.HIVE_ORC_ZEROCOPY.varname);
jobConf.set(MAPRED_OUTPUT_SCHEMA.getAttribute(), OrcSchemaConverter.convert(fieldTypes, fieldNames).toString()); jobConf.set(COMPRESS.getAttribute(), compression.name()); jobConf.set(ROW_INDEX_STRIDE.getAttribute(), String.valueOf(rowIndexStride));
Arrays.stream(columnIds).filter(column -> column != -1) .mapToObj(String::valueOf).collect(Collectors.joining(",")); hadoopConf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), includeColumns); } else { hadoopConf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "");
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); OrcConf.COMPRESS.setString(jobConf, compression.name()); Properties tableProperties = new Properties(); tableProperties.setProperty(IOConstants.COLUMNS, "test"); tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName()); tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> {}); }
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); OrcConf.COMPRESS.setString(jobConf, compression.name()); Properties tableProperties = new Properties(); tableProperties.setProperty(IOConstants.COLUMNS, "test"); tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName()); tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> {}); }
String propName = property.getAttribute(); if (tableProps.containsKey(propName)){ jobProperties.put(propName, tableProps.get(propName));
if (useZeroCopy != OrcConf.USE_ZEROCOPY.getBoolean(jobConf)) { jobConf = new Configuration(jobConf); jobConf.setBoolean(OrcConf.USE_ZEROCOPY.getAttribute(), useZeroCopy);
public void convert() throws Exception { conf.setBoolean(OrcConf.BLOCK_PADDING.getAttribute(), false); Writer writer = OrcFile.createWriter(new Path(outputOrc), OrcFile.writerOptions(conf).setSchema(SCHEMA));
meta.putProperty(OrcConf.COMPRESS.getAttribute(), "SNAPPY"); } else if (codec.equals(Lz4Codec.class)) { meta.putProperty(OrcConf.COMPRESS.getAttribute(), "ZLIB"); } else { meta.putProperty(OrcConf.COMPRESS.getAttribute(), "NONE");
table.updateProperties() .defaultFormat(FileFormat.ORC) .set(OrcConf.COMPRESS.getAttribute(), CompressionKind.NONE.name()) .commit();