@Override public ValueReader<?> record(Schema record, List<String> names, List<ValueReader<?>> fields) { return GenericReaders.struct(AvroSchemaUtil.convert(record).asStructType(), fields); }
@Override public Type field(Types.NestedField field, Supplier<Type> future) { Preconditions.checkArgument(sourceType.isStructType(), "Not a struct: " + sourceType); Types.StructType sourceStruct = sourceType.asStructType(); this.sourceType = sourceStruct.field(field.fieldId()).type(); try { return future.get(); } finally { sourceType = sourceStruct; } }
@Override public Type field(Types.NestedField field, Supplier<Type> future) { Preconditions.checkArgument(sourceType.isStructType(), "Not a struct: " + sourceType); Types.StructType sourceStruct = sourceType.asStructType(); Types.NestedField sourceField = sourceStruct.field(field.name()); this.sourceType = sourceField.type(); try { return future.get(); } finally { sourceType = sourceStruct; } }
/** * Convert an ORC schema to an Iceberg schema. * @param schema the ORC schema * @param columnIds the column ids * @return the Iceberg schema */ public Schema fromOrc(TypeDescription schema, ColumnIdMap columnIds) { return new Schema(convertOrcToType(schema, columnIds).asStructType().fields()); }
@Override public Type struct(Types.StructType struct, Iterable<Type> fieldTypes) { Preconditions.checkNotNull(sourceType, "Evaluation must start with a schema."); Preconditions.checkArgument(sourceType.isStructType(), "Not a struct: " + sourceType); Types.StructType sourceStruct = sourceType.asStructType(); List<Types.NestedField> fields = struct.fields(); int length = fields.size(); List<Type> types = Lists.newArrayList(fieldTypes); List<Types.NestedField> newFields = Lists.newArrayListWithExpectedSize(length); for (int i = 0; i < length; i += 1) { Types.NestedField field = fields.get(i); int sourceFieldId = sourceStruct.field(field.name()).fieldId(); if (field.isRequired()) { newFields.add(Types.NestedField.required(sourceFieldId, field.name(), types.get(i))); } else { newFields.add(Types.NestedField.optional(sourceFieldId, field.name(), types.get(i))); } } return Types.StructType.of(newFields); }
Types.StructType struct = currentType.asStructType(); List<Types.NestedField> fields = struct.fields(); Map<Integer, Integer> idToOrdinal = Maps.newHashMap();
@Override public List<String> field(Types.NestedField readField, Supplier<List<String>> fieldErrors) { Types.StructType struct = currentType.asStructType(); Types.NestedField field = struct.field(readField.fieldId()); List<String> errors = Lists.newArrayList();
/** * Reassigns ids in a schema from another schema. * <p> * Ids are determined by field names. If a field in the schema cannot be found in the source * schema, this will throw IllegalArgumentException. * <p> * This will not alter a schema's structure, nullability, or types. * * @param schema the schema to have ids reassigned * @param idSourceSchema the schema from which field ids will be used * @return an structurally identical schema with field ids matching the source schema * @throws IllegalArgumentException if a field cannot be found (by name) in the source schema */ public static Schema reassignIds(Schema schema, Schema idSourceSchema) { Types.StructType struct = visit(schema, new ReassignIds(idSourceSchema)).asStructType(); return new Schema(struct.fields()); }
static Schema fixup(Schema schema, Schema referenceSchema) { return new Schema(TypeUtil.visit(schema, new FixupTypes(referenceSchema)).asStructType().fields()); }
return String.format("decimal(%s,%s)", decimalType.precision(), decimalType.scale()); case STRUCT: final Types.StructType structType = type.asStructType(); final String nameToType = (String) structType.fields().stream().map((f) -> { return String.format("%s:%s", f.name(), fromIcebergToHiveType(f.type()));
return String.format("decimal(%s,%s)", decimalType.precision(), decimalType.scale()); case STRUCT: final Types.StructType structType = type.asStructType(); final String nameToType = (String) structType.fields().stream().map((f) -> { return String.format("%s:%s", f.name(), fromIcebergToHiveType(f.type()));
return format("decimal(%s,%s)", decimalType.precision(), decimalType.scale()); //TODO may be just decimal? case STRUCT: final Types.StructType structType = type.asStructType(); final String nameToType = structType.fields().stream().map( f -> format("%s:%s", f.name(), convert(f.type()))
/** * Convert a Spark {@link StructType struct} to a {@link Schema} based on the given schema. * <p> * This conversion does not assign new ids; it uses ids from the base schema. * <p> * Data types, field order, and nullability will match the spark type. This conversion may return * a schema that is not compatible with base schema. * * @param baseSchema a Schema on which conversion is based * @param sparkType a Spark StructType * @return the equivalent Schema * @throws IllegalArgumentException if the type cannot be converted or there are missing ids */ public static Schema convert(Schema baseSchema, StructType sparkType) { // convert to a type with fresh ids Types.StructType struct = visit(sparkType, new SparkTypeToType(sparkType)).asStructType(); // reassign ids to match the base schema Schema schema = TypeUtil.reassignIds(new Schema(struct.fields()), baseSchema); // fix types that can't be represented in Spark (UUID and Fixed) return FixupTypes.fixup(schema, baseSchema); }
StructType structType = type.asStructType();
@Override public Type array(Schema array, Type elementType) { if (array.getLogicalType() instanceof LogicalMap) { // map stored as an array Schema keyValueSchema = array.getElementType(); Preconditions.checkArgument(AvroSchemaUtil.isKeyValueSchema(keyValueSchema), "Invalid key-value pair schema: {}", keyValueSchema); Types.StructType keyValueType = elementType.asStructType(); Types.NestedField keyField = keyValueType.field("key"); Types.NestedField valueField = keyValueType.field("value"); if (keyValueType.field("value").isOptional()) { return Types.MapType.ofOptional( keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type()); } else { return Types.MapType.ofRequired( keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type()); } } else { // normal array Schema elementSchema = array.getElementType(); int id = getElementId(array); if (AvroSchemaUtil.isOptionSchema(elementSchema)) { return Types.ListType.ofOptional(id, elementType); } else { return Types.ListType.ofRequired(id, elementType); } } }
Assert.assertTrue("Expected should be a Record", expected instanceof Record); Assert.assertTrue("Actual should be a Record", actual instanceof Record); assertEquals(type.asStructType(), (Record) expected, (Record) actual); break; case LIST:
Types.StructType struct = type.asStructType(); InternalRow internalRow = container.getStruct(ord, struct.fields().size()); Object[] data = new Object[struct.fields().size()];
Assert.assertTrue("Expected should be a Record", expected instanceof Record); Assert.assertTrue("Actual should be a Record", actual instanceof Record); assertEquals(type.asStructType(), (Record) expected, (Record) actual); break; case LIST:
for(Types.NestedField field: type.asStructType().fields()) { result.addField(field.name(), toOrc(field.fieldId(), field.type(), columnIds));
Record location = GenericRecord.create(writeSchema.findType("location").asStructType()); location.setField("lat", 52.995143f); location.setField("long", -1.539054f);