org.apache.parquet.hadoop.mapred java code examples

@Override
public RecordReader<Void, Container<V>> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
 return new RecordReaderWrapper<V>(split, job, reporter);
}

@Override
public RecordWriter<Void, V> getRecordWriter(FileSystem fs,
  JobConf conf, String name, Progressable progress) throws IOException {
 return new RecordWriterWrapper(realOutputFormat, fs, conf, name, progress);
}

@Override
public long getPos() throws IOException {
 return (long) (splitLen * getProgress());
}

private List<Footer> getFooters(FlowProcess<? extends JobConf> flowProcess, Hfs hfs) throws IOException {
 JobConf jobConf = flowProcess.getConfigCopy();
 DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat();
 format.addInputPath(jobConf, hfs.getPath());
 return format.getFooters(jobConf);
}

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
 if (isTaskSideMetaData(job)) {
  return super.getSplits(job, numSplits);
 }
 List<Footer> footers = getFooters(job);
 List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers);
 if (splits == null) {
  return null;
 }
 InputSplit[] resultSplits = new InputSplit[splits.size()];
 int i = 0;
 for (ParquetInputSplit split : splits) {
  resultSplits[i++] = new ParquetInputSplitWrapper(split);
 }
 return resultSplits;
}

 @Override
 public void sinkConfInit(FlowProcess<? extends JobConf> fp,
   Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {

  if (this.config.getKlass() == null) {
   throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify a thrift class in the constructor");
  }

  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class);
  TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass());
 }
}

private static Path getDefaultWorkFile(JobConf conf, String name, String extension) {
 String file = getUniqueName(conf, name) + extension;
 return new Path(getWorkOutputPath(conf), file);
}

@SuppressWarnings("rawtypes")
@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
 DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
 jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema);
 ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class);
}

@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<? extends JobConf> fp, SourceCall<Object[], RecordReader> sc)
  throws IOException {
 Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue();
 boolean hasNext = sc.getInput().next(null, value);
 if (!hasNext) { return false; }
 // Skip nulls
 if (value == null) { return true; }
 sc.getIncomingEntry().setTuple(value.get());
 return true;
}

private List<Footer> getFooters(FlowProcess<? extends JobConf> flowProcess, Hfs hfs) throws IOException {
 JobConf jobConf = flowProcess.getConfigCopy();
 DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat();
 format.addInputPath(jobConf, hfs.getPath());
 return format.getFooters(jobConf);
}

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
 if (isTaskSideMetaData(job)) {
  return super.getSplits(job, numSplits);
 }
 List<Footer> footers = getFooters(job);
 List<ParquetInputSplit> splits = realInputFormat.getSplits(job, footers);
 if (splits == null) {
  return null;
 }
 InputSplit[] resultSplits = new InputSplit[splits.size()];
 int i = 0;
 for (ParquetInputSplit split : splits) {
  resultSplits[i++] = new ParquetInputSplitWrapper(split);
 }
 return resultSplits;
}

 @Override
 public void sinkConfInit(FlowProcess<JobConf> fp,
   Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {

  if (this.config.getKlass() == null) {
   throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify a thrift class in the constructor");
  }

  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class);
  TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass());
 }
}

private static Path getDefaultWorkFile(JobConf conf, String name, String extension) {
 String file = getUniqueName(conf, name) + extension;
 return new Path(getWorkOutputPath(conf), file);
}

@SuppressWarnings("rawtypes")
@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
 DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
 jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema);
 ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class);
}

@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<JobConf> fp, SourceCall<Object[], RecordReader> sc)
  throws IOException {
 Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue();
 boolean hasNext = sc.getInput().next(null, value);
 if (!hasNext) { return false; }
 // Skip nulls
 if (value == null) { return true; }
 sc.getIncomingEntry().setTuple(value.get());
 return true;
}

@Override
public RecordReader<Void, Container<V>> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
 return new RecordReaderWrapper<V>(split, job, reporter);
}

@Override
public RecordWriter<Void, V> getRecordWriter(FileSystem fs,
  JobConf conf, String name, Progressable progress) throws IOException {
 return new RecordWriterWrapper(realOutputFormat, fs, conf, name, progress);
}

@Override
public long getPos() throws IOException {
 return (long) (splitLen * getProgress());
}

private List<Footer> getFooters(FlowProcess<JobConf> flowProcess, Hfs hfs) throws IOException {
 JobConf jobConf = flowProcess.getConfigCopy();
 DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat();
 format.addInputPath(jobConf, hfs.getPath());
 return format.getFooters(jobConf);
}

@SuppressWarnings("rawtypes")
@Override
public void sinkConfInit(FlowProcess<JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
 DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
 jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema);
 ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class);
}

How to use org.apache.parquet.hadoop.mapred

Best Java code snippets using org.apache.parquet.hadoop.mapred (Showing top 20 results out of 315)