@SuppressWarnings("deprecation") public JsonORCFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException { schema = schemaProvider.getSchema(logFilePath.getTopic(), logFilePath); Path path = new Path(logFilePath.getLogFilePath()); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(new Configuration(true))); offset = logFilePath.getOffset(); rows = reader.rows(); batch = reader.getSchema().createRowBatch(); rows.nextBatch(batch); }
public JsonORCFileWriter(LogFilePath logFilePath, CompressionCodec codec) throws IOException { Configuration conf = new Configuration(); Path path = new Path(logFilePath.getLogFilePath()); schema = schemaProvider.getSchema(logFilePath.getTopic(), logFilePath); List<TypeDescription> fieldTypes = schema.getChildren(); converters = new JsonConverter[fieldTypes.size()]; for (int c = 0; c < converters.length; ++c) { converters[c] = VectorColumnFiller.createConverter(fieldTypes .get(c)); } writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf) .compress(resolveCompression(codec)).setSchema(schema)); batch = schema.createRowBatch(); }
Reader reader = createReader(input, readerOptions(options.getConfiguration()).filesystem(fs)); if (!understandFormat(input, reader)) { continue; } else if (schema == null) { options.enforceBufferSize().bufferSize(bufferSize); mergeMetadata(userMetadata, reader); output = createWriter(outputPath, options); } else if (!readerIsCompatible(schema, fileVersion, writerVersion, rowIndexStride, compression, userMetadata, input, reader)) { continue; } else { mergeMetadata(userMetadata, reader); if (bufferSize < reader.getCompressionSize()) { bufferSize = reader.getCompressionSize();
@Override public RecordWriter<NullWritable, V> getRecordWriter(TaskAttemptContext taskAttemptContext ) throws IOException { Configuration conf = taskAttemptContext.getConfiguration(); Path filename = getDefaultWorkFile(taskAttemptContext, EXTENSION); Writer writer = OrcFile.createWriter(filename, org.apache.orc.mapred.OrcOutputFormat.buildOptions(conf)); return new OrcMapreduceRecordWriter<V>(writer); }
public OrcFileAppender build() { OrcFile.WriterOptions options = OrcFile.writerOptions(conf); return new OrcFileAppender(schema, file, options, metadata); } }
OrcFileAppender(Schema schema, OutputFile file, OrcFile.WriterOptions options, Map<String,byte[]> metadata) { orcSchema = TypeConversion.toOrc(schema, columnIds); options.setSchema(orcSchema); path = new Path(file.location()); try { writer = OrcFile.createWriter(path, options); } catch (IOException e) { throw new RuntimeException("Can't create file " + path, e); } writer.addUserMetadata(COLUMN_NUMBERS_ATTRIBUTE, columnIds.serialize()); metadata.forEach( (key,value) -> writer.addUserMetadata(key, ByteBuffer.wrap(value))); }
/** * This function builds the options for the ORC Writer based on the JobConf. * @param conf the job configuration * @return a new options object */ public static OrcFile.WriterOptions buildOptions(Configuration conf) { return OrcFile.writerOptions(conf) .version(OrcFile.Version.byName(OrcConf.WRITE_FORMAT.getString(conf))) .setSchema(TypeDescription.fromString(OrcConf.MAPRED_OUTPUT_SCHEMA .getString(conf))) .compress(CompressionKind.valueOf(OrcConf.COMPRESS.getString(conf))) .encodingStrategy(OrcFile.EncodingStrategy.valueOf (OrcConf.ENCODING_STRATEGY.getString(conf))) .bloomFilterColumns(OrcConf.BLOOM_FILTER_COLUMNS.getString(conf)) .bloomFilterFpp(OrcConf.BLOOM_FILTER_FPP.getDouble(conf)) .blockSize(OrcConf.BLOCK_SIZE.getLong(conf)) .blockPadding(OrcConf.BLOCK_PADDING.getBoolean(conf)) .stripeSize(OrcConf.STRIPE_SIZE.getLong(conf)) .rowIndexStride((int) OrcConf.ROW_INDEX_STRIDE.getLong(conf)) .bufferSize((int) OrcConf.BUFFER_SIZE.getLong(conf)) .paddingTolerance(OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(conf)); }
Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));
@Override public void initialize(Map<String, Object> metaData) { try { Configuration conf = new Configuration(); // conf.set(OrcConf.BLOOM_FILTER_COLUMNS.getAttribute(), "tags"); processor = new OrcEntityProcessor(OrcFile.createWriter(new Path(filename), OrcFile.writerOptions(conf).setSchema(SCHEMA)), SCHEMA.createRowBatch()); } catch (IOException e) { throw new OsmosisRuntimeException(e); } }
@Override public RecordWriter<NullWritable, V> getRecordWriter(FileSystem fileSystem, JobConf conf, String name, Progressable progressable ) throws IOException { Path path = getTaskOutputPath(conf, name); Writer writer = OrcFile.createWriter(path, buildOptions(conf).fileSystem(fileSystem)); return new OrcMapredRecordWriter<>(writer); } }
Assert.assertEquals(BUCKET_COUNT, fileStatuses.length); OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(hiveConf); for (FileStatus fileStatus : fileStatuses) { Reader r = OrcFile.createReader(fileStatus.getPath(), readerOptions); TypeDescription rowSchema = r.getSchema().getChildren().get(5); Assert.assertEquals("struct<a:int,b:string,s:struct<c:int,si:struct<d:double,e:float>>>", rowSchema.toString()); Assert.assertEquals(BUCKET_COUNT, fileStatuses.length); readerOptions = OrcFile.readerOptions(hiveConf); for (FileStatus fileStatus : fileStatuses) { Reader r = OrcFile.createReader(fileStatus.getPath(), readerOptions); TypeDescription rowSchema = r.getSchema().getChildren().get(5); Assert.assertEquals("struct<a:int,b:string,s:struct<c:int,si:struct<d:double,e:float>>>", rowSchema.toString());
public PentahoOrcRecordWriter( List<? extends IOrcOutputField> fields, TypeDescription schema, String filePath, Configuration conf ) { this.fields = fields; this.schema = schema; final AtomicInteger fieldNumber = new AtomicInteger(); //Mutable field count fields.forEach( field -> setOutputMeta( fieldNumber, field ) ); outputRowMetaAndData = new RowMetaAndData( outputRowMeta, new Object[ fieldNumber.get() ] ); try { S3NCredentialUtils.applyS3CredentialsToHadoopConfigurationIfNecessary( filePath, conf ); Path outputFile = new Path( S3NCredentialUtils.scrubFilePathIfNecessary( filePath ) ); writer = OrcFile.createWriter( outputFile, OrcFile.writerOptions( conf ) .setSchema( schema ) ); batch = schema.createRowBatch(); } catch ( IOException e ) { logger.error( e ); } //Write the addition metadata for the fields // new OrcMetaDataWriter( writer ).write( fields ); }
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
OrcFile.WriterOptions options = OrcFile.writerOptions(conf).setSchema(typeDescription); CompressionKind compressionKind = convertCompressionEnum(compression); if (compressionKind != CompressionKind.NONE) { options = options.compress(compressionKind); Writer writer = OrcFile.createWriter(file, options);
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
Writer writer = OrcFile.createWriter(new Path(outputOrc), OrcFile.writerOptions(conf).setSchema(SCHEMA));
private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException { //create reader, look at footer //no need to check side file since it can only be in a streaming ingest delta Reader orcReader = OrcFile.createReader(bucket.getPath(),OrcFile.readerOptions(fs.getConf()) .filesystem(fs)); AcidStats as = OrcAcidUtils.parseAcidStats(orcReader); if(as == null) { //should never happen since we are reading bucket_x written by acid write throw new IllegalStateException("AcidStats missing in " + bucket.getPath()); } return as.deletes > 0 || as.updates > 0; } private static String getCompactionCommand(Table t, Partition p) {
Configuration conf = new Configuration(); TypeDescription schema = getSchema(table.getSchema()); Writer writer = OrcFile.createWriter(new Path(this.path), OrcFile.writerOptions(conf).setSchema(schema)); VectorizedRowBatch batch = schema.createRowBatch();
@Override public List<IColumn> loadColumns(List<String> names) { try { boolean[] toRead = OrcFileLoader.this.project(names); Reader.Options options = new Reader.Options(); options = options.include(toRead); Reader reader = OrcFile.createReader(new Path(filename), OrcFile.readerOptions(OrcFileLoader.this.conf)); List<IAppendableColumn> result = readColumns( reader, options, OrcFileLoader.this.hillviewSchema); return Linq.map(result, e -> e); } catch (IOException e) { throw new RuntimeException(e); } } }
conf.setBoolean(OrcConf.BLOCK_PADDING.getAttribute(), false); Writer writer = OrcFile.createWriter(new Path(outputOrc), OrcFile.writerOptions(conf).setSchema(schema));