org.apache.parquet.avro.AvroParquetReader java code examples

@Override
public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path)
    throws IOException {
  final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf);
  return new AvroParquetHDFSRecordReader(readerBuilder.build());
}

protected String dumpArchive(FileSystem fs,Configuration conf, String file) throws Throwable {
 AvroParquetReader<GenericRecord> reader = null;
 try {
  reader = new AvroParquetReader<GenericRecord>(conf, new Path(file));
   GenericRecord record = reader.read();
   if(record == null) {
    break;
   reader.close();

reader = new AvroParquetReader(conf, path);
while (true) {
 IndexedRecord datum;
 try {
  datum = reader.read();
 } catch (EOFException e) {
  return true; // be lenient

private void advance() {
 try {
  this.next = reader.read();
 } catch (EOFException e) {
  this.next = null;
 } catch (IOException e) {
  this.state = ReaderWriterState.ERROR;
  throw new DatasetIOException("Unable to read next record from: " + path, e);
 }
}

@Override
public void initialize() {
 Preconditions.checkState(state.equals(ReaderWriterState.NEW),
  "A reader may not be opened more than once - current state:%s", state);
 LOG.debug("Opening reader on path:{}", path);
 try {
  final Configuration conf = fileSystem.getConf();
  AvroReadSupport.setAvroReadSchema(conf, readerSchema);
  reader = new AvroParquetReader<E>(
    conf, fileSystem.makeQualified(path));
 } catch (IOException e) {
  throw new DatasetIOException("Unable to create reader path:" + path, e);
 }
 advance();
 state = ReaderWriterState.OPEN;
}

@Override
public void close() {
 if (!state.equals(ReaderWriterState.OPEN)) {
  return;
 }
 LOG.debug("Closing reader on path:{}", path);
 try {
  reader.close();
 } catch (IOException e) {
  this.state = ReaderWriterState.ERROR;
  throw new DatasetIOException("Unable to close reader path:" + path, e);
 }
 state = ReaderWriterState.CLOSED;
}

Schema chukwaAvroSchema = ChukwaAvroSchema.getSchema();
AvroReadSupport.setRequestedProjection(conf, chukwaAvroSchema);
reader = new AvroParquetReader<GenericRecord>(conf, new Path(dataSinkFile));
 record = reader.read();
 if(record == null)
  break;
reader.close();
reader = null;
 reader.close();

public AvroParquetFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException {
  Path path = new Path(logFilePath.getLogFilePath());
  String topic = logFilePath.getTopic();
  Schema schema = schemaRegistryClient.getSchema(topic);
  reader = AvroParquetReader.<GenericRecord>builder(path).build();
  writer = new SpecificDatumWriter(schema);
  offset = logFilePath.getOffset();
}

@Override
public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path)
    throws IOException {
  final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf);
  return new AvroParquetHDFSRecordReader(readerBuilder.build());
}

AvroReadSupport.setRequestedProjection(conf, readSchema);
Set<String> rowKeys = new HashSet<>();
try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) {
 Object obj = reader.read();
 while (obj != null) {

public Stream<GenericRecord> toStream(Path hadoopPath) throws IOException {
  Filter filter = makeFilter();
  ParquetReader<GenericRecord> reader;
  try {
    reader = AvroParquetReader.<GenericRecord>builder(hadoopPath)
        .withFilter(filter)
        .withConf(getConfiguration())
        .build();
  } catch (IOException e) {
    // Default exception may not refer the input path
    throw new IOException("Issue on path: " + hadoopPath, e);
  } catch (RuntimeException e) {
    // Default exception may not refer the input path
    throw new IOException("Issue on path: " + hadoopPath, e);
  }
  return toStream(reader);
}

private ParquetReader<GenericRecord> initReader() throws IOException {
  Configuration configuration = getFs().getConf();
  if (this.schema != null) {
    AvroReadSupport.setAvroReadSchema(configuration, this.schema);
  }
  if (this.projection != null) {
    AvroReadSupport.setRequestedProjection(configuration, this.projection);
  }
  ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath())
      .withConf(configuration).build();
  return reader;
}

/**
 * NOTE: This literally reads the entire file contents, thus should be used with caution.
 */
public static List<GenericRecord> readAvroRecords(Configuration configuration, Path filePath) {
 ParquetReader reader = null;
 List<GenericRecord> records = new ArrayList<>();
 try {
  reader = AvroParquetReader.builder(filePath).withConf(configuration).build();
  Object obj = reader.read();
  while (obj != null) {
   if (obj instanceof GenericRecord) {
    records.add(((GenericRecord) obj));
   }
   obj = reader.read();
  }
 } catch (IOException e) {
  throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
 } finally {
  if (reader != null) {
   try {
    reader.close();
   } catch (IOException e) {
    // ignore
   }
  }
 }
 return records;
}

public Stream<GenericRecord> toStream(org.apache.hadoop.fs.Path hadoopPath) throws IOException {
  Filter filter = makeFilter();
  ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(hadoopPath)
      .withFilter(filter)
      .withConf(getConfiguration())
      .build();
  return toStream(reader);
}

private void initReader(Path file) {
 try {
  if (reader != null) {
   reader.close();
  }
  GenericData.get().addLogicalTypeConversion(new Conversions.DecimalConversion());
  this.reader = AvroParquetReader.<GenericRecord>builder(file).withDataModel(GenericData.get()).build();
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
}

AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema());
BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath())
    .withConf(getHadoopConf()).build()) {
 wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader),

} else {
 AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema());
 ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath())
   .withConf(getHadoopConf()).build();
 BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;

Javadoc

Read Avro records from a Parquet file.

Most used methods

Popular in Java

Making http post requests using okhttp
getSharedPreferences (Context)
getSupportFragmentManager (FragmentActivity)
setScale (BigDecimal)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
Calendar (java.util)
Calendar is an abstract base class for converting between a Date object and a set of integer fields
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Top plugins for Android Studio

How to useAvroParquetReader in org.apache.parquet.avro

Best Java code snippets using org.apache.parquet.avro.AvroParquetReader (Showing top 17 results out of 315)

How to use
AvroParquetReader
in
org.apache.parquet.avro