static Schema getSchema(Types.StructType partitionType) { return AvroSchemaUtil.convert(partitionType, PartitionData.class.getName()); }
ReadBuilder(com.netflix.iceberg.Schema schema, MessageType type) { this.schema = schema; this.avroSchemas = AvroSchemaUtil.convertTypes(schema.asStruct(), type.getName()); this.type = type; }
@Override public Schema union(Schema union, Iterable<Schema> options) { Preconditions.checkState(isOptionSchema(union), "Invalid schema: non-option unions are not supported: {}", union); Schema nonNullOriginal = fromOption(union); Schema nonNullResult = fromOptions(Lists.newArrayList(options)); if (nonNullOriginal != nonNullResult) { return toOption(nonNullResult); } return union; }
private static int getId(Schema schema, String propertyName) { if (schema.getType() == UNION) { return getId(fromOption(schema), propertyName); } Object id = schema.getObjectProp(propertyName); Preconditions.checkNotNull(id, "Missing expected '%s' property", propertyName); return toInt(id); }
@Override public Schema union(Schema union, List<Schema> options) { Preconditions.checkState(AvroSchemaUtil.isOptionSchema(union), "Invalid schema: non-option unions are not supported: {}", union); // only unions with null are allowed, and a null schema results in null Schema pruned = null; if (options.get(0) != null) { pruned = options.get(0); } else if (options.get(1) != null) { pruned = options.get(1); } if (pruned != null) { if (pruned != AvroSchemaUtil.fromOption(union)) { return AvroSchemaUtil.toOption(pruned); } return union; } return null; }
Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); Record p1 = new Record(AvroSchemaUtil.fromOption( AvroSchemaUtil.fromOption(record.getSchema().getField("points").schema()) .getElementType())); p1.put("x", 1);
@Test public void testMapOfLongToBytes() { Type map = Types.MapType.ofRequired(33, 34, Types.LongType.get(), Types.BinaryType.get()); Schema schema = AvroSchemaUtil.createMap( 33, Schema.create(Schema.Type.LONG), 34, Schema.create(Schema.Type.BYTES)); Assert.assertEquals("Avro schema to map", map, AvroSchemaUtil.convert(schema)); Assert.assertEquals("Map to Avro schema", schema, AvroSchemaUtil.convert(map)); }
field.name(), toOption(convert(field.type())), null, JsonProperties.NULL_VALUE); newField.addProp(AvroSchemaUtil.FIELD_ID_PROP, field.fieldId()); updatedFields.add(newField); return copyRecord(record, updatedFields, renames.get(record.getFullName()));
AvroSchemaUtil.convert(expectedSchema, projection.getName())); org.apache.avro.Schema avroReadSchema = AvroSchemaUtil.buildAvroProjection( AvroSchemaUtil.convert(ParquetSchemaUtil.convert(projection), projection.getName()), expectedSchema, ImmutableMap.of()); AvroReadSupport.setAvroReadSchema(configuration, ParquetAvro.parquetAvroSchema(avroReadSchema));
if (array.getLogicalType() instanceof LogicalMap) { Schema keyValue = array.getElementType(); int keyId = getFieldId(keyValue.getField("key")); int valueId = getFieldId(keyValue.getField("value")); if (keyValue.getField("value").schema() != element.getField("value").schema()) { return AvroSchemaUtil.createMap( keyId, keyValue.getField("key").schema(), valueId, element.getField("value").schema()); int elementId = getElementId(array); if (selectedIds.contains(elementId)) { return array;
@Override public Schema map(Schema map, Schema value) { int keyId = getKeyId(map); int valueId = getValueId(map); // if either key or value is selected, the whole map must be projected if (selectedIds.contains(keyId) || selectedIds.contains(valueId)) { return map; } else if (value != null) { if (value != map.getValueType()) { // the value must be a projection return Schema.createMap(value); } return map; } return null; }
@Override public Schema map(Types.MapType map, Schema keySchema, Schema valueSchema) { Schema mapSchema = results.get(map); if (mapSchema != null) { return mapSchema; } if (keySchema.getType() == Schema.Type.STRING) { // if the map has string keys, use Avro's map type mapSchema = Schema.createMap( map.isValueOptional() ? toOption(valueSchema) : valueSchema); mapSchema.addProp(AvroSchemaUtil.KEY_ID_PROP, map.keyId()); mapSchema.addProp(AvroSchemaUtil.VALUE_ID_PROP, map.valueId()); } else { mapSchema = AvroSchemaUtil.createMap(map.keyId(), keySchema, map.valueId(), map.isValueOptional() ? toOption(valueSchema) : valueSchema); } results.put(map, mapSchema); return mapSchema; }
@Override public Schema array(Schema array, Supplier<Schema> element) { if (array.getLogicalType() instanceof LogicalMap || (current.isMapType() && isKeyValueSchema(array.getElementType()))) { Preconditions.checkArgument(current.isMapType(), "Incompatible projected type: %s", current); Types.MapType m = current.asNestedType().asMapType(); return AvroSchemaUtil.createProjectionMap(keyValueSchema.getFullName(), getFieldId(keyField), keyField.name(), keyField.schema(), getFieldId(valueField), valueField.name(), valueProjection.schema()); } else if (!(array.getLogicalType() instanceof LogicalMap)) { return AvroSchemaUtil.createProjectionMap(keyValueSchema.getFullName(), getFieldId(keyField), keyField.name(), keyField.schema(), getFieldId(valueField), valueField.name(), valueField.schema());
@Override public ValueWriter<?> array(Schema array, ValueWriter<?> elementWriter) { LogicalType logical = array.getLogicalType(); if (logical != null && "map".equals(logical.getName())) { Type keyType = schema.findType(getFieldId(array.getElementType().getField("key"))); Type valueType = schema.findType(getFieldId(array.getElementType().getField("value"))); ValueWriter<?>[] writers = ((SparkValueWriters.StructWriter) elementWriter).writers; return SparkValueWriters.arrayMap( writers[0], convert(keyType), writers[1], convert(valueType)); } Type elementType = schema.findType(AvroSchemaUtil.getElementId(array)); return SparkValueWriters.array(elementWriter, convert(elementType)); }
@Override public void setSchema(Schema fileSchema) { this.fileSchema = fileSchema; Set<Integer> projectedIds = getProjectedIds(expectedSchema); Schema prunedSchema = AvroSchemaUtil.pruneColumns(fileSchema, projectedIds); this.readSchema = AvroSchemaUtil.buildAvroProjection(prunedSchema, expectedSchema, renames); this.wrapped = newDatumReader(); }
@Override public Schema.Field field(Schema.Field field, Supplier<Schema> fieldResult) { Types.StructType struct = current.asNestedType().asStructType(); int fieldId = AvroSchemaUtil.getFieldId(field); Types.NestedField expectedField = struct.field(fieldId); // TODO: what if there are no ids? // if the field isn't present, it was not selected if (expectedField == null) { return null; } String expectedName = expectedField.name(); this.current = expectedField.type(); try { Schema schema = fieldResult.get(); if (schema != field.schema() || !expectedName.equals(field.name())) { // add an alias for the field return copyField(field, schema, expectedName); } else { // always copy because fields can't be reused return copyField(field, field.schema(), field.name()); } } finally { this.current = struct; } }
@Override public ValueWriter<?> record(Schema record, List<String> names, List<ValueWriter<?>> fields) { List<DataType> types = Lists.newArrayList(); for (Schema.Field field : record.getFields()) { types.add(convert(schema.findType(getFieldId(field)))); } return SparkValueWriters.struct(fields, types); }
private int getKeyId(Schema schema) { if (schema.getObjectProp(AvroSchemaUtil.KEY_ID_PROP) != null) { return AvroSchemaUtil.getKeyId(schema); } else { return allocateId(); } }
private int getElementId(Schema schema) { if (schema.getObjectProp(AvroSchemaUtil.ELEMENT_ID_PROP) != null) { return AvroSchemaUtil.getElementId(schema); } else { return allocateId(); } }
); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); Record l1 = new Record(AvroSchemaUtil.fromOption( AvroSchemaUtil.fromOption(record.getSchema().getField("locations").schema()) .getValueType())); l1.put("lat", 53.992811f);