@Override protected Schema getEnumSchema(Object datum) { return (datum instanceof Enum) ? getSchema(datum.getClass()) : super.getEnumSchema(datum); }
@SuppressWarnings("unchecked") private void readObject(ObjectInputStream inputStream) throws ClassNotFoundException, IOException { recordClazz = (Class<? extends SpecificRecord>) inputStream.readObject(); schemaString = (String) inputStream.readObject(); if (recordClazz != null) { schema = SpecificData.get().getSchema(recordClazz); } else { schema = new Schema.Parser().parse(schemaString); } datumWriter = new SpecificDatumWriter<>(schema); arrayOutputStream = new ByteArrayOutputStream(); encoder = EncoderFactory.get().binaryEncoder(arrayOutputStream, null); } }
@Override protected Schema getEnumSchema(Object datum) { return (datum instanceof Enum) ? getSchema(datum.getClass()) : super.getEnumSchema(datum); }
void checkAvroInitialized() { if (datumReader != null) { return; } ClassLoader cl = Thread.currentThread().getContextClassLoader(); if (SpecificRecord.class.isAssignableFrom(recordClazz)) { SpecificData specificData = new SpecificData(cl); this.datumReader = new SpecificDatumReader<>(specificData); this.reader = specificData.getSchema(recordClazz); } else { this.reader = new Schema.Parser().parse(schemaString); GenericData genericData = new GenericData(cl); this.datumReader = new GenericDatumReader<>(null, this.reader, genericData); } this.inputStream = new MutableByteArrayInputStream(); this.decoder = DecoderFactory.get().binaryDecoder(inputStream, null); }
/** * Creates an Avro serialization schema for the given specific record class. * * @param recordClazz Avro record class used to serialize Flink's row to Avro's record */ public AvroRowSerializationSchema(Class<? extends SpecificRecord> recordClazz) { Preconditions.checkNotNull(recordClazz, "Avro record class must not be null."); this.recordClazz = recordClazz; this.schema = SpecificData.get().getSchema(recordClazz); this.schemaString = schema.toString(); this.datumWriter = new SpecificDatumWriter<>(schema); this.arrayOutputStream = new ByteArrayOutputStream(); this.encoder = EncoderFactory.get().binaryEncoder(arrayOutputStream, null); }
/** * Creates a ParquetWriterFactory for an Avro specific type. The Parquet writers will use the * schema of that specific type to build and write the columnar data. * * @param type The class of the type to write. */ public static <T extends SpecificRecordBase> ParquetWriterFactory<T> forSpecificRecord(Class<T> type) { final String schemaString = SpecificData.get().getSchema(type).toString(); final ParquetBuilder<T> builder = (out) -> createAvroParquetWriter(schemaString, SpecificData.get(), out); return new ParquetWriterFactory<>(builder); }
public SpecificDatumWriter(Class<T> c) { super(SpecificData.get().getSchema(c), SpecificData.getForClass(c)); }
public SpecificDatumWriter(Class<T> c) { super(SpecificData.get().getSchema(c), SpecificData.get()); }
/** Construct for reading instances of a class. */ public ReflectDatumReader(Class<T> c) { this(new ReflectData(c.getClassLoader())); setSchema(getSpecificData().getSchema(c)); }
/** * Converts an Avro class into a nested row structure with deterministic field order and data * types that are compatible with Flink's Table & SQL API. * * @param avroClass Avro specific record that contains schema information * @return type information matching the schema */ @SuppressWarnings("unchecked") public static <T extends SpecificRecord> TypeInformation<Row> convertToTypeInfo(Class<T> avroClass) { Preconditions.checkNotNull(avroClass, "Avro specific record class must not be null."); // determine schema to retrieve deterministic field order final Schema schema = SpecificData.get().getSchema(avroClass); return (TypeInformation<Row>) convertToTypeInfo(schema); }
/** Construct for reading instances of a class. */ public ReflectDatumReader(Class<T> c) { this(new ReflectData(c.getClassLoader())); setSchema(getSpecificData().getSchema(c)); }
/** Construct for reading instances of a class. */ public SpecificDatumReader(Class<T> c) { this(new SpecificData(c.getClassLoader())); setSchema(getSpecificData().getSchema(c)); }
/** Construct for reading instances of a class. */ public SpecificDatumReader(Class<T> c) { this(SpecificData.getForClass(c)); setSchema(getSpecificData().getSchema(c)); }
@SuppressWarnings("OptionalUsedAsFieldOrParameterType") private static <T> AvroFactory<T> fromSpecific(Class<T> type, ClassLoader cl, Optional<Schema> previousSchema) { SpecificData specificData = new SpecificData(cl); Schema newSchema = specificData.getSchema(type); return new AvroFactory<>( specificData, newSchema, new SpecificDatumReader<>(previousSchema.orElse(newSchema), newSchema, specificData), new SpecificDatumWriter<>(newSchema, specificData) ); }
@Override public void setSchema(Schema actual) { // if expected is unset and actual is a specific record, // then default expected to schema of currently loaded class if (getExpected() == null && actual != null && actual.getType() == Schema.Type.RECORD) { SpecificData data = getSpecificData(); Class c = data.getClass(actual); if (c != null && SpecificRecord.class.isAssignableFrom(c)) setExpected(data.getSchema(c)); } super.setSchema(actual); }
@Override public void setSchema(Schema actual) { // if expected is unset and actual is a specific record, // then default expected to schema of currently loaded class if (getExpected() == null && actual != null && actual.getType() == Schema.Type.RECORD) { SpecificData data = getSpecificData(); Class c = data.getClass(actual); if (c != null && SpecificRecord.class.isAssignableFrom(c)) setExpected(data.getSchema(c)); } super.setSchema(actual); }
private static Schema tryExtractAvroSchema(ClassLoader cl, Class<?> runtimeType) { if (isGenericRecord(runtimeType)) { return null; } if (isSpecificRecord(runtimeType)) { SpecificData d = new SpecificData(cl); return d.getSchema(runtimeType); } ReflectData d = new ReflectData(cl); return d.getSchema(runtimeType); }
/** * Creates a Avro deserialization schema for the given specific record class. Having the * concrete Avro record class might improve performance. * * @param recordClazz Avro record class used to deserialize Avro's record to Flink's row */ public AvroRowDeserializationSchema(Class<? extends SpecificRecord> recordClazz) { Preconditions.checkNotNull(recordClazz, "Avro record class must not be null."); this.recordClazz = recordClazz; schema = SpecificData.get().getSchema(recordClazz); typeInfo = (RowTypeInfo) AvroSchemaConverter.convertToTypeInfo(recordClazz); schemaString = schema.toString(); record = (IndexedRecord) SpecificData.newInstance(recordClazz, schema); datumReader = new SpecificDatumReader<>(schema); inputStream = new MutableByteArrayInputStream(); decoder = DecoderFactory.get().binaryDecoder(inputStream, null); }
@Test public void testGetMapSchema() throws Exception { SpecificData.get().getSchema(X.class.getField("map").getGenericType()); }
@Test public void testSpecificRecord() throws Exception { TestRecord s1 = new TestRecord(); TestRecord s2 = new TestRecord(); s1.setName("foo"); s1.setKind(Kind.BAZ); s1.setHash(new MD5(new byte[] {0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5})); s2.setName("bar"); s2.setKind(Kind.BAR); s2.setHash(new MD5(new byte[] {0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,6})); Schema schema = SpecificData.get().getSchema(TestRecord.class); check(schema, s1, s2, true, new SpecificDatumWriter<>(schema), SpecificData.get()); s2.setKind(Kind.BAZ); check(schema, s1, s2, true, new SpecificDatumWriter<>(schema), SpecificData.get()); }