/** * Override the Avro schema to use for reading. If not set, the Avro schema used for * writing is used. * <p> * Differences between the read and write schemas are resolved using * <a href="http://avro.apache.org/docs/current/spec.html#Schema+Resolution">Avro's schema resolution rules</a>. * @param job a job * @param avroReadSchema the requested schema * @see #setRequestedProjection(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema) * @see org.apache.parquet.avro.AvroParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema) */ public static void setAvroReadSchema(Job job, Schema avroReadSchema) { AvroReadSupport.setAvroReadSchema(ContextUtil.getConfiguration(job), avroReadSchema); }
private static void setConfigProperties(Configuration conf, Format format, Schema schema, Class<?> type) { GenericData model = DataModelUtil.getDataModelForType(type); if (Formats.AVRO.equals(format)) { setModel.invoke(conf, model.getClass()); conf.set(AVRO_SCHEMA_INPUT_KEY, schema.toString()); } else if (Formats.PARQUET.equals(format)) { // TODO: update to a version of Parquet with setAvroDataSupplier //AvroReadSupport.setAvroDataSupplier(conf, // DataModelUtil.supplierClassFor(model)); AvroReadSupport.setAvroReadSchema(conf, schema); } }
@Override public void initialize() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "A reader may not be opened more than once - current state:%s", state); LOG.debug("Opening reader on path:{}", path); try { final Configuration conf = fileSystem.getConf(); AvroReadSupport.setAvroReadSchema(conf, readerSchema); reader = new AvroParquetReader<E>( conf, fileSystem.makeQualified(path)); } catch (IOException e) { throw new DatasetIOException("Unable to create reader path:" + path, e); } advance(); state = ReaderWriterState.OPEN; }
conf.addResource(getFs(filePath.toString(), conf).getConf()); Schema readSchema = HoodieAvroUtils.getRecordKeySchema(); AvroReadSupport.setAvroReadSchema(conf, readSchema); AvroReadSupport.setRequestedProjection(conf, readSchema); Set<String> rowKeys = new HashSet<>();
.setAvroReadSchema(jsc.hadoopConfiguration(), (new Schema.Parser().parse(schemaStr))); ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));
private ParquetReader<GenericRecord> initReader() throws IOException { Configuration configuration = getFs().getConf(); if (this.schema != null) { AvroReadSupport.setAvroReadSchema(configuration, this.schema); } if (this.projection != null) { AvroReadSupport.setRequestedProjection(configuration, this.projection); } ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath()) .withConf(configuration).build(); return reader; }
.setAvroReadSchema(jsc.hadoopConfiguration(), (new Schema.Parser().parse(schemaStr))); ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));
AvroReadSupport.setAvroReadSchema(conf, readerSchema);
AvroSchemaUtil.convert(ParquetSchemaUtil.convert(projection), projection.getName()), expectedSchema, ImmutableMap.of()); AvroReadSupport.setAvroReadSchema(configuration, ParquetAvro.parquetAvroSchema(avroReadSchema));
"Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId); } else { AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null; try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath())
"Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId); } else { AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build();