reporter.setStatus(fileSplit.toString()); try {
@Override public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { FileSplit fSplit = (FileSplit)inputSplit; reporter.setStatus(fSplit.toString()); Path path = fSplit.getPath(); OrcFile.ReaderOptions opts = OrcFile.readerOptions(conf); if(fSplit instanceof OrcSplit){ OrcSplit orcSplit = (OrcSplit) fSplit; if (orcSplit.hasFooter()) { opts.orcTail(orcSplit.getOrcTail()); } opts.maxLength(orcSplit.getFileLength()); } Reader reader = OrcFile.createReader(path, opts); return new VectorizedOrcRecordReader(reader, conf, fSplit); }
@Override public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException { final FileSplit file = (FileSplit)split; reporter.setStatus(file.toString()); final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job)); params.setModel(ReflectData.get()); if (job.get(AvroJob.INPUT_SCHEMA) != null) params.setSchema(AvroJob.getInputSchema(job)); return new RecordReader<AvroWrapper<T>, NullWritable>() { private AvroColumnReader<T> reader = new AvroColumnReader<>(params); private float rows = reader.getRowCount(); private long row; public AvroWrapper<T> createKey() { return new AvroWrapper<>(null); } public NullWritable createValue() { return NullWritable.get(); } public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { if (!reader.hasNext()) return false; wrapper.datum(reader.next()); row++; return true; } public float getProgress() throws IOException { return row / rows; } public long getPos() throws IOException { return row; } public void close() throws IOException { reader.close(); } }; }
@Override public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { FileSplit fSplit = (FileSplit)inputSplit; reporter.setStatus(fSplit.toString()); Path path = fSplit.getPath(); OrcFile.ReaderOptions opts = OrcFile.readerOptions(conf); if(fSplit instanceof OrcSplit){ OrcSplit orcSplit = (OrcSplit) fSplit; if (orcSplit.hasFooter()) { opts.orcTail(orcSplit.getOrcTail()); } opts.maxLength(orcSplit.getFileLength()); } Reader reader = OrcFile.createReader(path, opts); return new VectorizedOrcRecordReader(reader, conf, fSplit); }
@Override public String toString() { return super.toString() + " Split #"+String.format("%05d", index); } }
reporter.setStatus(split.toString());
reporter.setStatus(split.toString());
@Override public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { FileSplit fSplit = (FileSplit)inputSplit; reporter.setStatus(fSplit.toString()); Path path = fSplit.getPath(); OrcFile.ReaderOptions opts = OrcFile.readerOptions(conf); if(fSplit instanceof OrcSplit){ OrcSplit orcSplit = (OrcSplit) fSplit; if (orcSplit.hasFooter()) { opts.fileMetaInfo(orcSplit.getFileMetaInfo()); } } Reader reader = OrcFile.createReader(path, opts); return new VectorizedOrcRecordReader(reader, conf, fSplit); }
Reporter reporter) throws IOException { final FileSplit file = (FileSplit)split; reporter.setStatus(file.toString());
reporter.setStatus(fileSplit.toString()); try {