org.apache.orc.OrcFile java code examples

@SuppressWarnings("deprecation")
public JsonORCFileReader(LogFilePath logFilePath, CompressionCodec codec)
    throws IOException {
  schema = schemaProvider.getSchema(logFilePath.getTopic(),
      logFilePath);
  Path path = new Path(logFilePath.getLogFilePath());
  Reader reader = OrcFile.createReader(path,
      OrcFile.readerOptions(new Configuration(true)));
  offset = logFilePath.getOffset();
  rows = reader.rows();
  batch = reader.getSchema().createRowBatch();
  rows.nextBatch(batch);
}

public JsonORCFileWriter(LogFilePath logFilePath, CompressionCodec codec)
    throws IOException {
  Configuration conf = new Configuration();
  Path path = new Path(logFilePath.getLogFilePath());
  schema = schemaProvider.getSchema(logFilePath.getTopic(),
      logFilePath);
  List<TypeDescription> fieldTypes = schema.getChildren();
  converters = new JsonConverter[fieldTypes.size()];
  for (int c = 0; c < converters.length; ++c) {
    converters[c] = VectorColumnFiller.createConverter(fieldTypes
        .get(c));
  }
  writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf)
      .compress(resolveCompression(codec)).setSchema(schema));
  batch = schema.createRowBatch();
}

Reader reader = createReader(input,
  readerOptions(options.getConfiguration()).filesystem(fs));
if (!understandFormat(input, reader)) {
 continue;
} else if (schema == null) {
  options.enforceBufferSize().bufferSize(bufferSize);
 mergeMetadata(userMetadata, reader);
 output = createWriter(outputPath, options);
} else if (!readerIsCompatible(schema, fileVersion, writerVersion,
  rowIndexStride, compression, userMetadata, input, reader)) {
 continue;
} else {
 mergeMetadata(userMetadata, reader);
 if (bufferSize < reader.getCompressionSize()) {
  bufferSize = reader.getCompressionSize();

@Override
public RecordWriter<NullWritable, V>
   getRecordWriter(TaskAttemptContext taskAttemptContext
           ) throws IOException {
 Configuration conf = taskAttemptContext.getConfiguration();
 Path filename = getDefaultWorkFile(taskAttemptContext, EXTENSION);
 Writer writer = OrcFile.createWriter(filename,
   org.apache.orc.mapred.OrcOutputFormat.buildOptions(conf));
  return new OrcMapreduceRecordWriter<V>(writer);
}

 public OrcFileAppender build() {
  OrcFile.WriterOptions options =
    OrcFile.writerOptions(conf);
  return new OrcFileAppender(schema, file, options, metadata);
 }
}

OrcFileAppender(Schema schema,
        OutputFile file,
        OrcFile.WriterOptions options,
        Map<String,byte[]> metadata) {
 orcSchema = TypeConversion.toOrc(schema, columnIds);
 options.setSchema(orcSchema);
 path = new Path(file.location());
 try {
  writer = OrcFile.createWriter(path, options);
 } catch (IOException e) {
  throw new RuntimeException("Can't create file " + path, e);
 }
 writer.addUserMetadata(COLUMN_NUMBERS_ATTRIBUTE, columnIds.serialize());
 metadata.forEach(
   (key,value) -> writer.addUserMetadata(key, ByteBuffer.wrap(value)));
}

/**
 * This function builds the options for the ORC Writer based on the JobConf.
 * @param conf the job configuration
 * @return a new options object
 */
public static OrcFile.WriterOptions buildOptions(Configuration conf) {
 return OrcFile.writerOptions(conf)
   .version(OrcFile.Version.byName(OrcConf.WRITE_FORMAT.getString(conf)))
   .setSchema(TypeDescription.fromString(OrcConf.MAPRED_OUTPUT_SCHEMA
     .getString(conf)))
   .compress(CompressionKind.valueOf(OrcConf.COMPRESS.getString(conf)))
   .encodingStrategy(OrcFile.EncodingStrategy.valueOf
     (OrcConf.ENCODING_STRATEGY.getString(conf)))
   .bloomFilterColumns(OrcConf.BLOOM_FILTER_COLUMNS.getString(conf))
   .bloomFilterFpp(OrcConf.BLOOM_FILTER_FPP.getDouble(conf))
   .blockSize(OrcConf.BLOCK_SIZE.getLong(conf))
   .blockPadding(OrcConf.BLOCK_PADDING.getBoolean(conf))
   .stripeSize(OrcConf.STRIPE_SIZE.getLong(conf))
   .rowIndexStride((int) OrcConf.ROW_INDEX_STRIDE.getLong(conf))
   .bufferSize((int) OrcConf.BUFFER_SIZE.getLong(conf))
   .paddingTolerance(OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(conf));
}

Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));

@Override
public void initialize(Map<String, Object> metaData) {
  try {
    Configuration conf = new Configuration();
    // conf.set(OrcConf.BLOOM_FILTER_COLUMNS.getAttribute(), "tags");
    processor = new OrcEntityProcessor(OrcFile.createWriter(new Path(filename),
        OrcFile.writerOptions(conf).setSchema(SCHEMA)), SCHEMA.createRowBatch());
  } catch (IOException e) {
    throw new OsmosisRuntimeException(e);
  }
}

 @Override
 public RecordWriter<NullWritable, V> getRecordWriter(FileSystem fileSystem,
                            JobConf conf,
                            String name,
                            Progressable progressable
                            ) throws IOException {
  Path path = getTaskOutputPath(conf, name);
  Writer writer = OrcFile.createWriter(path,
    buildOptions(conf).fileSystem(fileSystem));
  return new OrcMapredRecordWriter<>(writer);
 }
}

Assert.assertEquals(BUCKET_COUNT, fileStatuses.length);
OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(hiveConf);
for (FileStatus fileStatus : fileStatuses) {
 Reader r = OrcFile.createReader(fileStatus.getPath(), readerOptions);
 TypeDescription rowSchema = r.getSchema().getChildren().get(5);
 Assert.assertEquals("struct<a:int,b:string,s:struct<c:int,si:struct<d:double,e:float>>>", rowSchema.toString());
Assert.assertEquals(BUCKET_COUNT, fileStatuses.length);
readerOptions = OrcFile.readerOptions(hiveConf);
for (FileStatus fileStatus : fileStatuses) {
 Reader r = OrcFile.createReader(fileStatus.getPath(), readerOptions);
 TypeDescription rowSchema = r.getSchema().getChildren().get(5);
 Assert.assertEquals("struct<a:int,b:string,s:struct<c:int,si:struct<d:double,e:float>>>", rowSchema.toString());

public PentahoOrcRecordWriter( List<? extends IOrcOutputField> fields, TypeDescription schema, String filePath,
                Configuration conf ) {
 this.fields = fields;
 this.schema = schema;
 final AtomicInteger fieldNumber = new AtomicInteger();  //Mutable field count
 fields.forEach( field -> setOutputMeta( fieldNumber, field ) );
 outputRowMetaAndData = new RowMetaAndData( outputRowMeta, new Object[ fieldNumber.get() ] );
 try {
  S3NCredentialUtils.applyS3CredentialsToHadoopConfigurationIfNecessary( filePath, conf );
  Path outputFile = new Path( S3NCredentialUtils.scrubFilePathIfNecessary( filePath ) );
  writer = OrcFile.createWriter( outputFile,
   OrcFile.writerOptions( conf )
    .setSchema( schema ) );
  batch = schema.createRowBatch();
 } catch ( IOException e ) {
  logger.error( e );
 }
 //Write the addition metadata for the fields
 // new OrcMetaDataWriter( writer ).write( fields );
}

/**
 * Initialize ORC file reader and batch record reader.
 * Please note that `initBatch` is needed to be called after this.
 */
@Override
public void initialize(
  InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
 FileSplit fileSplit = (FileSplit)inputSplit;
 Configuration conf = taskAttemptContext.getConfiguration();
 Reader reader = OrcFile.createReader(
  fileSplit.getPath(),
  OrcFile.readerOptions(conf)
   .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))
   .filesystem(fileSplit.getPath().getFileSystem(conf)));
 Reader.Options options =
  OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength());
 recordReader = reader.rows(options);
}

OrcFile.WriterOptions options = OrcFile.writerOptions(conf).setSchema(typeDescription);
CompressionKind compressionKind = convertCompressionEnum(compression);
if (compressionKind != CompressionKind.NONE) {
 options = options.compress(compressionKind);
Writer writer = OrcFile.createWriter(file, options);

/**
 * Initialize ORC file reader and batch record reader.
 * Please note that `initBatch` is needed to be called after this.
 */
@Override
public void initialize(
  InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
 FileSplit fileSplit = (FileSplit)inputSplit;
 Configuration conf = taskAttemptContext.getConfiguration();
 Reader reader = OrcFile.createReader(
  fileSplit.getPath(),
  OrcFile.readerOptions(conf)
   .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))
   .filesystem(fileSplit.getPath().getFileSystem(conf)));
 Reader.Options options =
  OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength());
 recordReader = reader.rows(options);
}

Writer writer = OrcFile.createWriter(new Path(outputOrc),
    OrcFile.writerOptions(conf).setSchema(SCHEMA));

private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException {
 //create reader, look at footer
 //no need to check side file since it can only be in a streaming ingest delta
 Reader orcReader = OrcFile.createReader(bucket.getPath(),OrcFile.readerOptions(fs.getConf())
   .filesystem(fs));
 AcidStats as = OrcAcidUtils.parseAcidStats(orcReader);
 if(as == null) {
  //should never happen since we are reading bucket_x written by acid write
  throw new IllegalStateException("AcidStats missing in " + bucket.getPath());
 }
 return as.deletes > 0 || as.updates > 0;
}
private static String getCompactionCommand(Table t, Partition p) {

Configuration conf = new Configuration();
TypeDescription schema = getSchema(table.getSchema());
Writer writer = OrcFile.createWriter(new Path(this.path),
    OrcFile.writerOptions(conf).setSchema(schema));
VectorizedRowBatch batch = schema.createRowBatch();

  @Override
  public List<IColumn> loadColumns(List<String> names) {
    try {
      boolean[] toRead = OrcFileLoader.this.project(names);
      Reader.Options options = new Reader.Options();
      options = options.include(toRead);
      Reader reader = OrcFile.createReader(new Path(filename),
          OrcFile.readerOptions(OrcFileLoader.this.conf));
      List<IAppendableColumn> result = readColumns(
          reader, options, OrcFileLoader.this.hillviewSchema);
      return Linq.map(result, e -> e);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
}

conf.setBoolean(OrcConf.BLOCK_PADDING.getAttribute(), false);
Writer writer = OrcFile.createWriter(new Path(outputOrc),
    OrcFile.writerOptions(conf).setSchema(schema));

Javadoc

Contains factory methods to read or write ORC files.

Most used methods

createReader
readerOptions
createWriter
Create an ORC file writer. This is the public interface for creating writers going forward and new o
writerOptions
Create a set of writer options based on a configuration.
mergeMetadata
readerIsCompatible
Is the new reader compatible with the file that is being written?
understandFormat
Do we understand the version in the reader?

Popular in Java

Creating JSON documents from java classes using gson
addToBackStack (FragmentTransaction)
onRequestPermissionsResult (Fragment)
setScale (BigDecimal)
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
JFrame (javax.swing)
Runner (org.openjdk.jmh.runner)
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Top PhpStorm plugins

How to useOrcFile in org.apache.orc

Best Java code snippets using org.apache.orc.OrcFile (Showing top 20 results out of 315)

How to use
OrcFile
in
org.apache.orc