public static void createEmptyAvroStream(final OutputStream outStream) throws IOException { final FieldAssembler<Schema> builder = SchemaBuilder.record("NiFi_ExecuteSQL_Record").namespace("any.data").fields(); final Schema schema = builder.endRecord(); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, outStream); } }
/** * Parses the given string CTL schema along with its dependencies as an * {@link org.apache.avro.Schema Avro schema}. * * @param avroSchema A string CTL schema to parse * @return A parsed CTL schema as an Avro schema * @throws Exception - if the given CTL schema is invalid and thus cannot be parsed. */ public static Schema parseStringCtlSchema(String avroSchema) throws Exception { Schema.Parser parser = new Schema.Parser(); ObjectMapper mapper = new ObjectMapper(); JsonNode node = mapper.readTree(avroSchema); JsonNode dependenciesNode = node.get(DEPENDENCIES); if (dependenciesNode != null && dependenciesNode.isArray()) { Map<String, Schema> types = new HashMap<>(); for (int i = 0; i < dependenciesNode.size(); i++) { JsonNode dependencyNode = dependenciesNode.get(i); Fqn fqn = new Fqn(dependencyNode.get(FQN).asText()); Schema fakeSchema = SchemaBuilder .record(fqn.getName()).namespace(fqn.getNamespace()) .fields() .endRecord(); types.put(fqn.getFqnString(), fakeSchema); } parser.addTypes(types); } return parser.parse(avroSchema); }
@SuppressWarnings("fallthrough") private Schema getGranularityBasedSchema() { FieldAssembler<Schema> assembler = SchemaBuilder.record("GenericRecordTimePartition").namespace("gobblin.writer.partitioner").fields(); // Construct the fields in reverse order if (!Strings.isNullOrEmpty(this.writerPartitionSuffix)) { assembler = assembler.name(SUFFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } assembler = assembler.name(this.granularity.toString()).type(Schema.create(Schema.Type.STRING)).noDefault(); if (!Strings.isNullOrEmpty(this.writerPartitionPrefix)) { assembler = assembler.name(PREFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } Schema schema = assembler.endRecord(); Collections.reverse(schema.getFields()); return schema; }
private Schema getDateTimeFormatBasedSchema() { FieldAssembler<Schema> assembler = SchemaBuilder.record("GenericRecordTimePartition").namespace("gobblin.writer.partitioner").fields(); if (!Strings.isNullOrEmpty(this.writerPartitionPrefix)) { assembler = assembler.name(PREFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } assembler = assembler.name(PARTITIONED_PATH).type(Schema.create(Schema.Type.STRING)).noDefault(); if (!Strings.isNullOrEmpty(this.writerPartitionSuffix)) { assembler = assembler.name(SUFFIX).type(Schema.create(Schema.Type.STRING)).noDefault(); } return assembler.endRecord(); }
@Test public void testSwitchName() { String originalName = "originalName"; String newName = "newName"; Schema schema = SchemaBuilder.record(originalName).fields(). requiredDouble("double").optionalFloat("float").endRecord(); Schema newSchema = AvroUtils.switchName(schema, newName); Assert.assertEquals(newSchema.getName(), newName); for(Schema.Field field : newSchema.getFields()) { Assert.assertEquals(field, schema.getField(field.name())); } Assert.assertEquals(newName, AvroUtils.switchName(schema, newName).getName()); Assert.assertEquals(schema, AvroUtils.switchName(AvroUtils.switchName(schema, newName), schema.getName())); }
@Test public void testUnionWithNull() { Schema nestedRecord = SchemaBuilder.record("nested").fields().requiredDouble("double") .requiredString("string").endRecord(); Schema union = SchemaBuilder.unionOf().nullType().and().type(nestedRecord).endUnion(); Schema schema = SchemaBuilder.record("record").fields().name("union").type(union).noDefault().endRecord(); Schema doubleSchema = AvroUtils.getFieldSchema(schema, "union.double").get(); Assert.assertEquals(doubleSchema.getType(), Schema.Type.DOUBLE); GenericRecord nested = new GenericData.Record(nestedRecord); nested.put("double", 10); nested.put("string", "testString"); GenericRecord record = new GenericData.Record(schema); record.put("union", nested); String stringValue = AvroUtils.getFieldValue(record, "union.string").get().toString(); Assert.assertEquals(stringValue, "testString"); } }
@Test public void testSplitSchemaBuild() { Schema s = SchemaBuilder .record("HandshakeRequest") .namespace("org.apache.avro.ipc").fields() .name("clientProtocol").type().optional().stringType() .name("meta").type().optional().map().values().bytesType() .endRecord(); String schemaString = s.toString(); int mid = schemaString.length() / 2; Schema parsedStringSchema = new org.apache.avro.Schema.Parser().parse(s.toString()); Schema parsedArrayOfStringSchema = new org.apache.avro.Schema.Parser().parse (schemaString.substring(0, mid), schemaString.substring(mid)); assertNotNull(parsedStringSchema); assertNotNull(parsedArrayOfStringSchema); assertEquals(parsedStringSchema.toString(), parsedArrayOfStringSchema.toString()); }
@Test public void testRegisterAndGetByKey() throws SchemaRegistryException { Properties properties = new Properties(); properties.setProperty(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_URL, TEST_URL); SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient(); KafkaSchemaRegistry<Integer, Schema> kafkaSchemaRegistry = new ConfluentKafkaSchemaRegistry(properties, schemaRegistryClient); Schema schema = SchemaBuilder.record(TEST_RECORD_NAME).namespace(TEST_NAMESPACE).fields().name(TEST_FIELD_NAME).type() .stringType().noDefault().endRecord(); Integer id = kafkaSchemaRegistry.register(schema); Assert.assertEquals(schema, kafkaSchemaRegistry.getSchemaByKey(id)); }
@Test public void testWriteNullableUUIDReadRequiredString() throws IOException { Schema nullableUuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()) .fields().optionalString("uuid").endRecord(); LogicalTypes.uuid().addToSchema( nullableUuidSchema.getField("uuid").schema().getTypes().get(1)); UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); RecordWithUUID r1 = new RecordWithUUID(); r1.uuid = u1; RecordWithUUID r2 = new RecordWithUUID(); r2.uuid = u2; List<RecordWithStringUUID> expected = Arrays.asList( new RecordWithStringUUID(), new RecordWithStringUUID()); expected.get(0).uuid = u1.toString(); expected.get(1).uuid = u2.toString(); File test = write(REFLECT, nullableUuidSchema, r1, r2); // verify that the field's type overrides the logical type Schema uuidStringSchema = SchemaBuilder .record(RecordWithStringUUID.class.getName()) .fields().requiredString("uuid").endRecord(); Assert.assertEquals("Should read uuid as String without UUID conversion", expected, read(REFLECT.createDatumReader(uuidStringSchema), test)); }
@Test public void testWriteNullableUUID() throws IOException { Schema nullableUuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()) .fields().optionalString("uuid").endRecord(); LogicalTypes.uuid().addToSchema( nullableUuidSchema.getField("uuid").schema().getTypes().get(1)); UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); RecordWithUUID r1 = new RecordWithUUID(); r1.uuid = u1; RecordWithUUID r2 = new RecordWithUUID(); r2.uuid = u2; List<RecordWithStringUUID> expected = Arrays.asList( new RecordWithStringUUID(), new RecordWithStringUUID()); expected.get(0).uuid = u1.toString(); expected.get(1).uuid = u2.toString(); File test = write(REFLECT, nullableUuidSchema, r1, r2); // verify that the field's type overrides the logical type Schema nullableUuidStringSchema = SchemaBuilder .record(RecordWithStringUUID.class.getName()) .fields().optionalString("uuid").endRecord(); Assert.assertEquals("Should read uuid as String without UUID conversion", expected, read(ReflectData.get().createDatumReader(nullableUuidStringSchema), test)); }
private void shouldDeserializeTypeCorrectly(final org.apache.avro.Schema avroSchema, final Object avroValue, final Schema ksqlSchema, final Object ksqlValue) { final SchemaRegistryClient schemaRegistryClient = new MockSchemaRegistryClient(); final org.apache.avro.Schema avroRecordSchema = org.apache.avro.SchemaBuilder.record("test_row") .fields() .name("field0") .type(avroSchema) .noDefault() .endRecord(); final Schema ksqlRecordSchema = SchemaBuilder.struct().field("field0", ksqlSchema).build(); final GenericRecord avroRecord = new GenericData.Record(avroRecordSchema); avroRecord.put("field0", avroValue); final GenericRow row = serializeDeserializeAvroRecord( ksqlRecordSchema, "test-topic", schemaRegistryClient, avroRecord); assertThat(row.getColumns().size(), equalTo(1)); assertThat(row.getColumns().get(0), equalTo(ksqlValue)); }
@Test public void testGenericRecordReadWithCompatibleSchema() throws IOException { RegistryAvroDeserializationSchema<GenericRecord> deserializer = new RegistryAvroDeserializationSchema<>( GenericRecord.class, SchemaBuilder.record("Address") .fields() .requiredString("street") .requiredInt("num") .optionalString("country") .endRecord(), () -> new SchemaCoder() { @Override public Schema readSchema(InputStream in) { return Address.getClassSchema(); } } ); GenericRecord genericRecord = deserializer.deserialize(writeRecord( address, Address.getClassSchema())); assertEquals(address.getNum(), genericRecord.get("num")); assertEquals(address.getStreet(), genericRecord.get("street").toString()); assertNull(genericRecord.get("city")); assertNull(genericRecord.get("state")); assertNull(genericRecord.get("zip")); assertNull(genericRecord.get("country")); }
@Test public void testRecursiveRecord() { Schema schema = SchemaBuilder.record("LongList").fields() .name("value").type().longType().noDefault() .name("next").type().optional().type("LongList") .endRecord(); Assert.assertEquals("LongList", schema.getName()); List<Schema.Field> fields = schema.getFields(); Assert.assertEquals(2, fields.size()); Assert.assertEquals( new Schema.Field("value", Schema.create(Schema.Type.LONG), null, null), fields.get(0)); Assert.assertEquals( Schema.Type.UNION, fields.get(1).schema().getType()); Assert.assertEquals( Schema.Type.NULL, fields.get(1).schema().getTypes().get(0).getType()); Schema recordSchema = fields.get(1).schema().getTypes().get(1); Assert.assertEquals(Schema.Type.RECORD, recordSchema.getType()); Assert.assertEquals("LongList", recordSchema.getName()); Assert.assertEquals(NullNode.getInstance(), fields.get(1).defaultValue()); }
@Test public void testEnumDefaultAppliedWhenFieldDefaultDefined() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields() .name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA).noDefault() .endRecord(); Schema readerSchema = SchemaBuilder.record("Record1").fields() .name("field1").type(ENUM_AB_ENUM_DEFAULT_A_SCHEMA).withDefault("B") .endRecord(); GenericRecord datum = new GenericData.Record(writerSchema); datum.put("field1", new GenericData.EnumSymbol(writerSchema, "C")); GenericRecord decodedDatum = serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema); //The A is the Enum default, which is assigned since C is not in [A,B]. assertEquals("A", decodedDatum.get("field1").toString()); }
@Test public void testReadUUIDList() throws IOException { Schema uuidListSchema = SchemaBuilder.record(RecordWithUUIDList.class.getName()) .fields() .name("uuids").type().array().items().stringType().noDefault() .endRecord(); uuidListSchema.getField("uuids").schema().addProp( SpecificData.CLASS_PROP, List.class.getName()); LogicalTypes.uuid().addToSchema( uuidListSchema.getField("uuids").schema().getElementType()); UUID u1 = UUID.randomUUID(); UUID u2 = UUID.randomUUID(); GenericRecord r = new GenericData.Record(uuidListSchema); r.put("uuids", Arrays.asList(u1.toString(), u2.toString())); RecordWithUUIDList expected = new RecordWithUUIDList(); expected.uuids = Arrays.asList(u1, u2); File test = write(uuidListSchema, r); Assert.assertEquals("Should convert Strings to UUIDs", expected, read(REFLECT.createDatumReader(uuidListSchema), test).get(0)); }
@Test public void testEnumDefaultNotAppliedWhenCompatibleSymbolIsFound() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields() .name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA).noDefault() .endRecord(); Schema readerSchema = SchemaBuilder.record("Record1").fields() .name("field1").type(ENUM_AB_ENUM_DEFAULT_A_SCHEMA).noDefault() .endRecord(); GenericRecord datum = new GenericData.Record(writerSchema); datum.put("field1", new GenericData.EnumSymbol(writerSchema, "B")); GenericRecord decodedDatum = serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema); assertEquals("B", decodedDatum.get("field1").toString()); }
@Test public void testEnumDefaultAppliedWhenNoFieldDefaultDefined() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields() .name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA).noDefault() .endRecord(); Schema readerSchema = SchemaBuilder.record("Record1").fields() .name("field1").type(ENUM_AB_ENUM_DEFAULT_A_SCHEMA).noDefault() .endRecord(); GenericRecord datum = new GenericData.Record(writerSchema); datum.put("field1", new GenericData.EnumSymbol(writerSchema, "C")); GenericRecord decodedDatum = serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema); //The A is the Enum fallback value. assertEquals("A", decodedDatum.get("field1").toString()); }
public DayPartitioner(State state, int numBranches, int branchId) { _withColumnNames = state.getPropAsBoolean(GoggleIngestionConfigurationKeys.KEY_INCLUDE_COLUMN_NAMES, false); _prefix = state.getProp(GoggleIngestionConfigurationKeys.KEY_PARTITIONER_PREFIX); _withPrefix = StringUtils.isNotBlank(_prefix); _dateColumn = state.getProp(GoggleIngestionConfigurationKeys.KEY_DATE_COLUMN_NAME, DEFAULT_DATE_COLUMN); _dateFormatter = DateTimeFormat.forPattern(state.getProp(GoggleIngestionConfigurationKeys.KEY_DATE_FORMAT, DEFAULT_DATE_FORMAT)); SchemaBuilder.FieldAssembler<Schema> assembler = SchemaBuilder.record(NAME).namespace(NAME_SPACE).fields(); Schema stringType = Schema.create(Schema.Type.STRING); if (_withPrefix) { assembler = assembler.name(PARTITION_COLUMN_PREFIX).type(stringType).noDefault(); } _partitionSchema = assembler.name(PARTITION_COLUMN_YEAR).type(stringType).noDefault().name(PARTITION_COLUMN_MONTH).type(stringType) .noDefault().name(PARTITION_COLUMN_DAY).type(stringType).noDefault().endRecord(); }
@Test public void testSingleSubRecord() throws IOException { final Schema child = SchemaBuilder.record("Child") .namespace("org.apache.avro.nested") .fields() .requiredString("childField").endRecord(); final Schema parent = SchemaBuilder.record("Parent") .namespace("org.apache.avro.nested") .fields() .requiredString("parentField1") .name("child1").type(child).noDefault() .requiredString("parentField2").endRecord(); final String inputAsExpected = "{\n" + " \"parentField1\": \"parentValue1\",\n" + " \"child1\":{\n" + " \"childField\":\"childValue1\"\n" + " },\n" + " \"parentField2\":\"parentValue2\"\n" + "}"; final ByteArrayInputStream inputStream = new ByteArrayInputStream(inputAsExpected.getBytes()); final JsonDecoder decoder = DecoderFactory.get().jsonDecoder(parent, inputStream); final DatumReader<Object> reader = new GenericDatumReader<Object>(parent); final GenericData.Record decoded = (GenericData.Record) reader.read(null, decoder); assertThat(decoded.get("parentField1").toString(), equalTo("parentValue1")); assertThat(decoded.get("parentField2").toString(), equalTo("parentValue2")); assertThat(((GenericData.Record)decoded.get("child1")).get("childField").toString(), equalTo("childValue1")); }