@Override public RecordReader<Void, Container<V>> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { return new RecordReaderWrapper<V>(split, job, reporter); }
@Override public RecordWriter<Void, V> getRecordWriter(FileSystem fs, JobConf conf, String name, Progressable progress) throws IOException { return new RecordWriterWrapper(realOutputFormat, fs, conf, name, progress); }
@Override public long getPos() throws IOException { return (long) (splitLen * getProgress()); }
private List<Footer> getFooters(FlowProcess<? extends JobConf> flowProcess, Hfs hfs) throws IOException { JobConf jobConf = flowProcess.getConfigCopy(); DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat(); format.addInputPath(jobConf, hfs.getPath()); return format.getFooters(jobConf); }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { if (isTaskSideMetaData(job)) { return super.getSplits(job, numSplits); } List<Footer> footers = getFooters(job); List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers); if (splits == null) { return null; } InputSplit[] resultSplits = new InputSplit[splits.size()]; int i = 0; for (ParquetInputSplit split : splits) { resultSplits[i++] = new ParquetInputSplitWrapper(split); } return resultSplits; }
@Override public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { if (this.config.getKlass() == null) { throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify a thrift class in the constructor"); } DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf); DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class); TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass()); } }
private static Path getDefaultWorkFile(JobConf conf, String name, String extension) { String file = getUniqueName(conf, name) + extension; return new Path(getWorkOutputPath(conf), file); }
@SuppressWarnings("rawtypes") @Override public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf); jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema); ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class); }
@SuppressWarnings("unchecked") @Override public boolean source(FlowProcess<? extends JobConf> fp, SourceCall<Object[], RecordReader> sc) throws IOException { Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue(); boolean hasNext = sc.getInput().next(null, value); if (!hasNext) { return false; } // Skip nulls if (value == null) { return true; } sc.getIncomingEntry().setTuple(value.get()); return true; }
private List<Footer> getFooters(FlowProcess<? extends JobConf> flowProcess, Hfs hfs) throws IOException { JobConf jobConf = flowProcess.getConfigCopy(); DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat(); format.addInputPath(jobConf, hfs.getPath()); return format.getFooters(jobConf); }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { if (isTaskSideMetaData(job)) { return super.getSplits(job, numSplits); } List<Footer> footers = getFooters(job); List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers); if (splits == null) { return null; } InputSplit[] resultSplits = new InputSplit[splits.size()]; int i = 0; for (ParquetInputSplit split : splits) { resultSplits[i++] = new ParquetInputSplitWrapper(split); } return resultSplits; }
@Override public void sinkConfInit(FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { if (this.config.getKlass() == null) { throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify a thrift class in the constructor"); } DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf); DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class); TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass()); } }
private static Path getDefaultWorkFile(JobConf conf, String name, String extension) { String file = getUniqueName(conf, name) + extension; return new Path(getWorkOutputPath(conf), file); }
@SuppressWarnings("rawtypes") @Override public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf); jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema); ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class); }
@SuppressWarnings("unchecked") @Override public boolean source(FlowProcess<JobConf> fp, SourceCall<Object[], RecordReader> sc) throws IOException { Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue(); boolean hasNext = sc.getInput().next(null, value); if (!hasNext) { return false; } // Skip nulls if (value == null) { return true; } sc.getIncomingEntry().setTuple(value.get()); return true; }
@Override public RecordReader<Void, Container<V>> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { return new RecordReaderWrapper<V>(split, job, reporter); }
@Override public RecordWriter<Void, V> getRecordWriter(FileSystem fs, JobConf conf, String name, Progressable progress) throws IOException { return new RecordWriterWrapper(realOutputFormat, fs, conf, name, progress); }
@Override public long getPos() throws IOException { return (long) (splitLen * getProgress()); }
private List<Footer> getFooters(FlowProcess<JobConf> flowProcess, Hfs hfs) throws IOException { JobConf jobConf = flowProcess.getConfigCopy(); DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat(); format.addInputPath(jobConf, hfs.getPath()); return format.getFooters(jobConf); }
@SuppressWarnings("rawtypes") @Override public void sinkConfInit(FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf); jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema); ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class); }