@SuppressWarnings("deprecation") public JsonORCFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException { schema = schemaProvider.getSchema(logFilePath.getTopic(), logFilePath); Path path = new Path(logFilePath.getLogFilePath()); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(new Configuration(true))); offset = logFilePath.getOffset(); rows = reader.rows(); batch = reader.getSchema().createRowBatch(); rows.nextBatch(batch); }
Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));
Assert.assertEquals(BUCKET_COUNT, fileStatuses.length); OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(hiveConf); for (FileStatus fileStatus : fileStatuses) { Reader r = OrcFile.createReader(fileStatus.getPath(), readerOptions); Assert.assertEquals(BUCKET_COUNT, fileStatuses.length); readerOptions = OrcFile.readerOptions(hiveConf); for (FileStatus fileStatus : fileStatuses) { Reader r = OrcFile.createReader(fileStatus.getPath(), readerOptions);
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException { //create reader, look at footer //no need to check side file since it can only be in a streaming ingest delta Reader orcReader = OrcFile.createReader(bucket.getPath(),OrcFile.readerOptions(fs.getConf()) .filesystem(fs)); AcidStats as = OrcAcidUtils.parseAcidStats(orcReader); if(as == null) { //should never happen since we are reading bucket_x written by acid write throw new IllegalStateException("AcidStats missing in " + bucket.getPath()); } return as.deletes > 0 || as.updates > 0; } private static String getCompactionCommand(Table t, Partition p) {
@Override public List<IColumn> loadColumns(List<String> names) { try { boolean[] toRead = OrcFileLoader.this.project(names); Reader.Options options = new Reader.Options(); options = options.include(toRead); Reader reader = OrcFile.createReader(new Path(filename), OrcFile.readerOptions(OrcFileLoader.this.conf)); List<IAppendableColumn> result = readColumns( reader, options, OrcFileLoader.this.hillviewSchema); return Linq.map(result, e -> e); } catch (IOException e) { throw new RuntimeException(e); } } }
OrcFile.readerOptions( conf ).filesystem( fs ) ); } catch ( IOException e ) { throw new RuntimeException( "Unable to read data from file " + fileName, e );
OrcFile.readerOptions(conf)); this.fileReaders.add(reader); this.fileStatuses.add(status);
protected RecordReader createReader( FileInputSplit fileSplit, TaskAttemptContext taskAttemptContext) throws IOException { // by default, we use org.apache.orc.mapreduce.OrcMapreduceRecordReader Configuration hadoopConf = taskAttemptContext.getConfiguration(); org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(fileSplit.getPath().toUri()); Reader file = OrcFile.createReader(filePath, OrcFile.readerOptions(hadoopConf).maxLength(OrcConf.MAX_FILE_LENGTH.getLong(hadoopConf))); return new OrcMapreduceRecordReader<>(file, org.apache.orc.mapred.OrcInputFormat.buildOptions(hadoopConf, file, fileSplit.getStart(), fileSplit.getLength())); } }
public OrcIterator build() { Preconditions.checkNotNull(schema, "Schema is required"); try { Path path = new Path(file.location()); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); ColumnIdMap columnIds = new ColumnIdMap(); TypeDescription orcSchema = TypeConversion.toOrc(schema, columnIds); Reader.Options options = reader.options(); if (start != null) { options.range(start, length); } options.schema(orcSchema); return new OrcIterator(path, orcSchema, reader.rows(options)); } catch (IOException e) { throw new RuntimeException("Can't open " + file.location(), e); } } }
OrcFile.readerOptions( conf ).filesystem( fs ) ); } catch ( IOException e ) { throw new IllegalArgumentException( "Unable to read data from file " + fileName, e );
final Reader reader = OrcFile.createReader(fileManager.getLastSourceFilePath(sourceFilePaths), OrcFile.readerOptions(configuration)); final String schema = reader.getSchema().toString(); log.info("ORC input file Schema " + schema);
@Override public RecordReader<NullWritable, V> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext ) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader file = OrcFile.createReader(split.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))); return new OrcMapreduceRecordReader<>(file, org.apache.orc.mapred.OrcInputFormat.buildOptions(conf, file, split.getStart(), split.getLength())); }
this.hillviewSchema = Schema.readFromJsonFile(Paths.get(this.schemaPath)); Reader reader = OrcFile.createReader(new Path(this.filename), OrcFile.readerOptions(conf)); this.schema = reader.getSchema(); assert this.schema != null;
OrcFile.readerOptions(hadoopConf).maxLength(OrcConf.MAX_FILE_LENGTH.getLong(hadoopConf)));
@Override public RecordReader<NullWritable, V> getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { FileSplit split = (FileSplit) inputSplit; Reader file = OrcFile.createReader(split.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))); return new OrcMapredRecordReader<>(file, buildOptions(conf, file, split.getStart(), split.getLength())); }
public OrcRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); this.offset = split.getOffset(index); this.end = offset + split.getLength(index); final Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(context.getConfiguration())); final Reader.Options options = new Reader.Options(); options.range(offset, split.getLength(index)); in = reader.rows(options); schema = reader.getSchema(); this.batch = schema.createRowBatch(); rowInBatch = 0; this.row = (OrcStruct) OrcStruct.createValue(schema); }
@Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); orcSchema = reader.getSchema(); requestedColumnIds = OrcUtils.requestedColumnIds(caseSensitive, fieldNames, schemaFieldNames, reader); initBatch(); }
FileSystem fs = input.getFileSystem(conf); Reader reader = createReader(input, readerOptions(options.getConfiguration()).filesystem(fs));