@Override public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path) throws IOException { final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf); return new AvroParquetHDFSRecordReader(readerBuilder.build()); }
protected String dumpArchive(FileSystem fs,Configuration conf, String file) throws Throwable { AvroParquetReader<GenericRecord> reader = null; try { reader = new AvroParquetReader<GenericRecord>(conf, new Path(file)); GenericRecord record = reader.read(); if(record == null) { break; reader.close();
reader = new AvroParquetReader(conf, path); while (true) { IndexedRecord datum; try { datum = reader.read(); } catch (EOFException e) { return true; // be lenient
private void advance() { try { this.next = reader.read(); } catch (EOFException e) { this.next = null; } catch (IOException e) { this.state = ReaderWriterState.ERROR; throw new DatasetIOException("Unable to read next record from: " + path, e); } }
@Override public void initialize() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "A reader may not be opened more than once - current state:%s", state); LOG.debug("Opening reader on path:{}", path); try { final Configuration conf = fileSystem.getConf(); AvroReadSupport.setAvroReadSchema(conf, readerSchema); reader = new AvroParquetReader<E>( conf, fileSystem.makeQualified(path)); } catch (IOException e) { throw new DatasetIOException("Unable to create reader path:" + path, e); } advance(); state = ReaderWriterState.OPEN; }
@Override public void close() { if (!state.equals(ReaderWriterState.OPEN)) { return; } LOG.debug("Closing reader on path:{}", path); try { reader.close(); } catch (IOException e) { this.state = ReaderWriterState.ERROR; throw new DatasetIOException("Unable to close reader path:" + path, e); } state = ReaderWriterState.CLOSED; }
Schema chukwaAvroSchema = ChukwaAvroSchema.getSchema(); AvroReadSupport.setRequestedProjection(conf, chukwaAvroSchema); reader = new AvroParquetReader<GenericRecord>(conf, new Path(dataSinkFile)); record = reader.read(); if(record == null) break; reader.close(); reader = null; reader.close();
public AvroParquetFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException { Path path = new Path(logFilePath.getLogFilePath()); String topic = logFilePath.getTopic(); Schema schema = schemaRegistryClient.getSchema(topic); reader = AvroParquetReader.<GenericRecord>builder(path).build(); writer = new SpecificDatumWriter(schema); offset = logFilePath.getOffset(); }
@Override public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path) throws IOException { final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf); return new AvroParquetHDFSRecordReader(readerBuilder.build()); }
AvroReadSupport.setRequestedProjection(conf, readSchema); Set<String> rowKeys = new HashSet<>(); try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) { Object obj = reader.read(); while (obj != null) {
public Stream<GenericRecord> toStream(Path hadoopPath) throws IOException { Filter filter = makeFilter(); ParquetReader<GenericRecord> reader; try { reader = AvroParquetReader.<GenericRecord>builder(hadoopPath) .withFilter(filter) .withConf(getConfiguration()) .build(); } catch (IOException e) { // Default exception may not refer the input path throw new IOException("Issue on path: " + hadoopPath, e); } catch (RuntimeException e) { // Default exception may not refer the input path throw new IOException("Issue on path: " + hadoopPath, e); } return toStream(reader); }
private ParquetReader<GenericRecord> initReader() throws IOException { Configuration configuration = getFs().getConf(); if (this.schema != null) { AvroReadSupport.setAvroReadSchema(configuration, this.schema); } if (this.projection != null) { AvroReadSupport.setRequestedProjection(configuration, this.projection); } ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath()) .withConf(configuration).build(); return reader; }
/** * NOTE: This literally reads the entire file contents, thus should be used with caution. */ public static List<GenericRecord> readAvroRecords(Configuration configuration, Path filePath) { ParquetReader reader = null; List<GenericRecord> records = new ArrayList<>(); try { reader = AvroParquetReader.builder(filePath).withConf(configuration).build(); Object obj = reader.read(); while (obj != null) { if (obj instanceof GenericRecord) { records.add(((GenericRecord) obj)); } obj = reader.read(); } } catch (IOException e) { throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // ignore } } } return records; }
public Stream<GenericRecord> toStream(org.apache.hadoop.fs.Path hadoopPath) throws IOException { Filter filter = makeFilter(); ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(hadoopPath) .withFilter(filter) .withConf(getConfiguration()) .build(); return toStream(reader); }
private void initReader(Path file) { try { if (reader != null) { reader.close(); } GenericData.get().addLogicalTypeConversion(new Conversions.DecimalConversion()); this.reader = AvroParquetReader.<GenericRecord>builder(file).withDataModel(GenericData.get()).build(); } catch (IOException e) { throw new RuntimeException(e); } }
AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null; try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build()) { wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader),
} else { AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build(); BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;