@Override public DataType struct(Types.StructType struct, List<DataType> fieldResults) { List<Types.NestedField> fields = struct.fields(); List<StructField> sparkFields = Lists.newArrayListWithExpectedSize(fieldResults.size()); for (int i = 0; i < fields.size(); i += 1) { Types.NestedField field = fields.get(i); DataType type = fieldResults.get(i); sparkFields.add(StructField.apply(field.name(), type, field.isOptional(), Metadata.empty())); } return StructType$.MODULE$.apply(sparkFields); }
@Override public Schema struct(Types.StructType struct, List<Schema> fieldSchemas) { Schema recordSchema = results.get(struct); if (recordSchema != null) { return recordSchema; } String recordName = names.get(struct); if (recordName == null) { recordName = "r" + fieldIds.peek(); } List<Types.NestedField> structFields = struct.fields(); List<Schema.Field> fields = Lists.newArrayListWithExpectedSize(fieldSchemas.size()); for (int i = 0; i < structFields.size(); i += 1) { Types.NestedField structField = structFields.get(i); Schema.Field field = new Schema.Field( structField.name(), fieldSchemas.get(i), null, structField.isOptional() ? NULL_VALUE : null); field.addProp(AvroSchemaUtil.FIELD_ID_PROP, structField.fieldId()); fields.add(field); } recordSchema = Schema.createRecord(recordName, null, null, false, fields); results.put(struct, recordSchema); return recordSchema; }
@Override public Map<Integer, Accessor<InternalRow>> struct( Types.StructType struct, List<Map<Integer, Accessor<InternalRow>>> fieldResults) { Map<Integer, Accessor<InternalRow>> accessors = Maps.newHashMap(); List<Types.NestedField> fields = struct.fields(); for (int i = 0; i < fieldResults.size(); i += 1) { Types.NestedField field = fields.get(i); Map<Integer, Accessor<InternalRow>> result = fieldResults.get(i); if (result != null) { for (Map.Entry<Integer, Accessor<InternalRow>> entry : result.entrySet()) { accessors.put(entry.getKey(), newAccessor(i, field.isOptional(), field.type().asNestedType().asStructType(), entry.getValue())); } } else { accessors.put(field.fieldId(), newAccessor(i, field.type())); } } if (accessors.isEmpty()) { return null; } return accessors; }
Preconditions.checkArgument(field.isOptional(), "Missing required field: %s", field.name());
projectedFields.put(field.name(), field); } else if (field.isOptional()) { changed = true; projectedFields.put(field.name(),
} else if (projectedType != null) { sameTypes = false; // signal that some types were altered if (field.isOptional()) { selectedFields.add( Types.NestedField.optional(field.fieldId(), field.name(), projectedType));
if (field.isOptional()) { newFields.add(Types.NestedField.optional(field.fieldId(), name, resultType)); } else {
if (readField.isRequired() && field.isOptional()) { errors.add(readField.name() + " should be required, but is optional");
@Override public Type array(Schema array, Type elementType) { if (array.getLogicalType() instanceof LogicalMap) { // map stored as an array Schema keyValueSchema = array.getElementType(); Preconditions.checkArgument(AvroSchemaUtil.isKeyValueSchema(keyValueSchema), "Invalid key-value pair schema: {}", keyValueSchema); Types.StructType keyValueType = elementType.asStructType(); Types.NestedField keyField = keyValueType.field("key"); Types.NestedField valueField = keyValueType.field("value"); if (keyValueType.field("value").isOptional()) { return Types.MapType.ofOptional( keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type()); } else { return Types.MapType.ofRequired( keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type()); } } else { // normal array Schema elementSchema = array.getElementType(); int id = getElementId(array); if (AvroSchemaUtil.isOptionSchema(elementSchema)) { return Types.ListType.ofOptional(id, elementType); } else { return Types.ListType.ofRequired(id, elementType); } } }
@Override public Type struct(Types.StructType struct, Iterable<Type> fieldResults) { Preconditions.checkNotNull(struct, "Cannot prune null struct. Pruning must start with a schema."); Preconditions.checkArgument(current instanceof StructType, "Not a struct: %s", current); List<Types.NestedField> fields = struct.fields(); List<Type> types = Lists.newArrayList(fieldResults); boolean changed = false; List<Types.NestedField> newFields = Lists.newArrayListWithExpectedSize(types.size()); for (int i = 0; i < fields.size(); i += 1) { Types.NestedField field = fields.get(i); Type type = types.get(i); if (type == null) { changed = true; } else if (field.type() == type) { newFields.add(field); } else if (field.isOptional()) { changed = true; newFields.add(Types.NestedField.optional(field.fieldId(), field.name(), type)); } else { changed = true; newFields.add(Types.NestedField.required(field.fieldId(), field.name(), type)); } } if (changed) { return Types.StructType.of(newFields); } return struct; }
/** * Converts iceberg schema to field dto. * * @param schema schema * @param partitionFields partitioned fields * @return list of field Info */ public List<FieldInfo> icebergeSchemaTofieldDtos(final Schema schema, final List<PartitionField> partitionFields) { final List<FieldInfo> fields = Lists.newArrayList(); final List<String> partitionNames = partitionFields.stream() .map(PartitionField::name).collect(Collectors.toList()); for (Types.NestedField field : schema.columns()) { final FieldInfo fieldInfo = new FieldInfo(); fieldInfo.setName(field.name()); fieldInfo.setType(toMetacatType(fromIcebergToHiveType(field.type()))); fieldInfo.setIsNullable(field.isOptional()); fieldInfo.setComment(field.doc()); fieldInfo.setPartitionKey(partitionNames.contains(field.name())); fields.add(fieldInfo); } return fields; }
/** * Converts iceberg schema to field dto. * * @param schema schema * @param partitionFields partitioned fields * @return list of field Info */ public List<FieldInfo> icebergeSchemaTofieldDtos(final Schema schema, final List<PartitionField> partitionFields) { final List<FieldInfo> fields = Lists.newArrayList(); final List<String> partitionNames = partitionFields.stream() .map(PartitionField::name).collect(Collectors.toList()); for (int i = 0; i < schema.columns().size(); i++) { final Types.NestedField field = schema.columns().get(i); final FieldInfo fieldInfo = new FieldInfo(); fieldInfo.setName(field.name()); fieldInfo.setType(toMetacatType(fromIcebergToHiveType(field.type()))); fieldInfo.setIsNullable(field.isOptional()); fieldInfo.setPartitionKey(partitionNames.contains(field.name())); fields.add(fieldInfo); } return fields; }
@Override public Type struct(Types.StructType struct, Iterable<Type> futures) { List<Types.NestedField> fields = struct.fields(); int length = struct.fields().size(); List<Integer> newIds = Lists.newArrayListWithExpectedSize(length); for (int i = 0; i < length; i += 1) { newIds.add(nextId.get()); // assign IDs for this struct's fields first } List<Types.NestedField> newFields = Lists.newArrayListWithExpectedSize(length); Iterator<Type> types = futures.iterator(); for (int i = 0; i < length; i += 1) { Types.NestedField field = fields.get(i); Type type = types.next(); if (field.isOptional()) { newFields.add(Types.NestedField.optional(newIds.get(i), field.name(), type)); } else { newFields.add(Types.NestedField.required(newIds.get(i), field.name(), type)); } } return Types.StructType.of(newFields); }
public Type field(NestedField field) { Type.Repetition repetition = field.isOptional() ? Type.Repetition.OPTIONAL : Type.Repetition.REQUIRED; int id = field.fieldId(); String name = field.name(); if (field.type().isPrimitiveType()) { return primitive(field.type().asPrimitiveType(), repetition, id, name); } else { NestedType nested = field.type().asNestedType(); if (nested.isStructType()) { return struct(nested.asStructType(), repetition, id, name); } else if (nested.isMapType()) { return map(nested.asMapType(), repetition, id, name); } else if (nested.isListType()) { return list(nested.asListType(), repetition, id, name); } throw new UnsupportedOperationException("Can't convert unknown type: " + nested); } }
@Override public Object field(Types.NestedField field, Supplier<Object> fieldResult) { // return null 5% of the time when the value is optional if (field.isOptional() && random.nextInt(20) == 1) { return null; } return fieldResult.get(); }
@Override public Object field(Types.NestedField field, Supplier<Object> fieldResult) { // return null 5% of the time when the value is optional if (field.isOptional() && random.nextInt(20) == 1) { return null; } return fieldResult.get(); }
@Override public Schema field(Types.NestedField field, Schema fieldSchema) { if (field.isOptional()) { return toOption(fieldSchema); } else { return fieldSchema; } }
@Override public Object field(Types.NestedField field, Supplier<Object> fieldResult) { // return null 5% of the time when the value is optional if (field.isOptional() && random.nextInt(20) == 1) { return null; } return fieldResult.get(); }
@Override public Object field(Types.NestedField field, Supplier<Object> fieldResult) { // return null 5% of the time when the value is optional if (field.isOptional() && random.nextInt(20) == 1) { return null; } return fieldResult.get(); }