org.apache.parquet.avro.AvroWriteSupport java code examples

public AvroParquetOutputFormat() {
 super(new AvroWriteSupport<T>());
}

/**
 * Set the Avro schema to use for writing. The schema is translated into a Parquet
 * schema so that the records can be written in Parquet format. It is also
 * stored in the Parquet metadata so that records can be reconstructed as Avro
 * objects at read time without specifying a read schema.
 * @param job a job
 * @param schema a schema for the data that will be written
 * @see org.apache.parquet.avro.AvroParquetInputFormat#setAvroReadSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
 */
public static void setSchema(Job job, Schema schema) {
 AvroWriteSupport.setSchema(ContextUtil.getConfiguration(job), schema);
}

 @Override
 public WriteContext init(Configuration conf) {
  String outputName = conf.get("crunch.namedoutput");
  if (outputName != null && !outputName.isEmpty()) {
   String schema = conf.get(PARQUET_AVRO_SCHEMA_PARAMETER + "." + outputName);
   setSchema(conf, new Schema.Parser().parse(schema));
  }
  return super.init(conf);
 }
}

 recordConsumer.addBinary(fromAvroString(value));
 break;
case RECORD:
 writeRecord(type.asGroupType(), avroSchema, value);
 break;
case ENUM:
 break;
case MAP:
 writeMap(type.asGroupType(), avroSchema, (Map<CharSequence, ?>) value);
 break;
case UNION:
 writeUnion(type.asGroupType(), avroSchema, value);
 break;

/**
 * Calls an appropriate write method based on the value.
 * Value MUST not be null.
 *
 * @param type the Parquet type
 * @param avroSchema the Avro schema
 * @param value a non-null value to write
 */
private void writeValue(Type type, Schema avroSchema, Object value) {
 Schema nonNullAvroSchema = AvroSchemaConverter.getNonNull(avroSchema);
 LogicalType logicalType = nonNullAvroSchema.getLogicalType();
 if (logicalType != null) {
  Conversion<?> conversion = model.getConversionByClass(
    value.getClass(), logicalType);
  writeValueWithoutConversion(type, nonNullAvroSchema,
    convert(nonNullAvroSchema, logicalType, conversion, value));
 } else {
  writeValueWithoutConversion(type, nonNullAvroSchema, value);
 }
}

@Override
public void write(T record) {
 if (rootLogicalType != null) {
  Conversion<?> conversion = model.getConversionByClass(
    record.getClass(), rootLogicalType);
  recordConsumer.startMessage();
  writeRecordFields(rootSchema, rootAvroSchema,
    convert(rootAvroSchema, rootLogicalType, conversion, record));
  recordConsumer.endMessage();
 } else {
  recordConsumer.startMessage();
  writeRecordFields(rootSchema, rootAvroSchema, record);
  recordConsumer.endMessage();
 }
}

@SuppressWarnings("unchecked")
public void write(IndexedRecord record) {
 write((T) record);
}

 /**
  * Sets the {@link AvroDataSupplier} class that will be used. The data
  * supplier provides instances of {@link org.apache.avro.generic.GenericData}
  * that are used to deconstruct records.
  *
  * @param job a {@link Job} to configure
  * @param supplierClass a supplier class
  */
 public static void setAvroDataSupplier(
   Job job, Class<? extends AvroDataSupplier> supplierClass) {
  AvroWriteSupport.setAvroDataSupplier(ContextUtil.getConfiguration(job),
    supplierClass);
 }
}

@Override
public WriteContext init(Configuration configuration) {
 if (rootAvroSchema == null) {
  this.rootAvroSchema = new Schema.Parser().parse(configuration.get(AVRO_SCHEMA));
  this.rootSchema = new AvroSchemaConverter().convert(rootAvroSchema);
 }
 if (model == null) {
  this.model = getDataModel(configuration);
 }
 boolean writeOldListStructure = configuration.getBoolean(
   WRITE_OLD_LIST_STRUCTURE, WRITE_OLD_LIST_STRUCTURE_DEFAULT);
 if (writeOldListStructure) {
  this.listWriter = new TwoLevelListWriter();
 } else {
  this.listWriter = new ThreeLevelListWriter();
 }
 Map<String, String> extraMetaData = new HashMap<String, String>();
 extraMetaData.put(AvroReadSupport.AVRO_SCHEMA_METADATA_KEY, rootAvroSchema.toString());
 return new WriteContext(rootSchema, extraMetaData);
}

@SuppressWarnings("unchecked")
private <T> WriteSupport<T> getWriteSupport(MessageType type) {
 if (writeSupport != null) {
  return (WriteSupport<T>) writeSupport;
 } else {
  return new AvroWriteSupport<>(
    type,
    ParquetAvro.parquetAvroSchema(AvroSchemaUtil.convert(schema, name)),
    ParquetAvro.DEFAULT_MODEL);
 }
}

@Override
protected void configure(Job job, KV<Void, IndexedRecord> sample) {
  super.configure(job, sample);
  IndexedRecord record = (IndexedRecord) sample.getValue();
  AvroWriteSupport.setSchema(job.getConfiguration(), record.getSchema());
  ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
}

private static <T> WriteSupport<T> writeSupport(Schema avroSchema,
                        GenericData model) {
 return new AvroWriteSupport<T>(
   new AvroSchemaConverter().convert(avroSchema), avroSchema, model);
}

@Override
protected void configure(Job job, KV<Void, IndexedRecord> sample) {
  super.configure(job, sample);
  IndexedRecord record = (IndexedRecord) sample.getValue();
  AvroWriteSupport.setSchema(job.getConfiguration(), record.getSchema());
  ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
}

private static <T> WriteSupport<T> writeSupport(Configuration conf,
                        Schema avroSchema,
                        GenericData model) {
 return new AvroWriteSupport<T>(
   new AvroSchemaConverter(conf).convert(avroSchema), avroSchema, model);
}

Javadoc

Avro implementation of WriteSupport for generic, specific, and reflect models. Use AvroParquetWriter or AvroParquetOutputFormat rather than using this class directly.

Most used methods

Popular in Java

Start an intent from android
setScale (BigDecimal)
onCreateOptionsMenu (Activity)
scheduleAtFixedRate (ScheduledExecutorService)
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top PhpStorm plugins

How to useAvroWriteSupport in org.apache.parquet.avro

Best Java code snippets using org.apache.parquet.avro.AvroWriteSupport (Showing top 14 results out of 315)

How to use
AvroWriteSupport
in
org.apache.parquet.avro