@Override public ValueWriter<?> record(Schema record, List<String> names, List<ValueWriter<?>> fields) { List<DataType> types = Lists.newArrayList(); for (Schema.Field field : record.getFields()) { types.add(convert(schema.findType(getFieldId(field)))); } return SparkValueWriters.struct(fields, types); }
private int getId(Schema.Field field) { if (field.getObjectProp(AvroSchemaUtil.FIELD_ID_PROP) != null) { return AvroSchemaUtil.getFieldId(field); } else { return allocateId(); } }
@Override public Schema record(Schema record, List<String> names, List<Schema> fields) { // Then this should access the record's fields by name List<Schema.Field> filteredFields = Lists.newArrayListWithExpectedSize(fields.size()); boolean hasChange = false; for (Schema.Field field : record.getFields()) { int fieldId = getFieldId(field); Schema fieldSchema = fields.get(field.pos()); // All primitives are selected by selecting the field, but map and list // types can be selected by projecting the keys, values, or elements. // This creates two conditions where the field should be selected: if the // id is selected or if the result of the field is non-null. The only // case where the converted field is non-null is when a map or list is // selected by lower IDs. if (selectedIds.contains(fieldId)) { filteredFields.add(copyField(field, field.schema())); } else if (fieldSchema != null) { hasChange = true; filteredFields.add(copyField(field, fieldSchema)); } } if (hasChange) { return copyRecord(record, filteredFields); } else if (filteredFields.size() == record.getFields().size()) { return record; } else if (!filteredFields.isEmpty()) { return copyRecord(record, filteredFields); } return null; }
@Override public Schema.Field field(Schema.Field field, Supplier<Schema> fieldResult) { Types.StructType struct = current.asNestedType().asStructType(); int fieldId = AvroSchemaUtil.getFieldId(field); Types.NestedField expectedField = struct.field(fieldId); // TODO: what if there are no ids? // if the field isn't present, it was not selected if (expectedField == null) { return null; } String expectedName = expectedField.name(); this.current = expectedField.type(); try { Schema schema = fieldResult.get(); if (schema != field.schema() || !expectedName.equals(field.name())) { // add an alias for the field return copyField(field, schema, expectedName); } else { // always copy because fields can't be reused return copyField(field, field.schema(), field.name()); } } finally { this.current = struct; } }
getFieldId(keyField), keyField.name(), keyField.schema(), getFieldId(valueField), valueField.name(), valueProjection.schema()); } else if (!(array.getLogicalType() instanceof LogicalMap)) { return AvroSchemaUtil.createProjectionMap(keyValueSchema.getFullName(), getFieldId(keyField), keyField.name(), keyField.schema(), getFieldId(valueField), valueField.name(), valueField.schema());
if (array.getLogicalType() instanceof LogicalMap) { Schema keyValue = array.getElementType(); int keyId = getFieldId(keyValue.getField("key")); int valueId = getFieldId(keyValue.getField("value"));
@Override public ValueWriter<?> array(Schema array, ValueWriter<?> elementWriter) { LogicalType logical = array.getLogicalType(); if (logical != null && "map".equals(logical.getName())) { Type keyType = schema.findType(getFieldId(array.getElementType().getField("key"))); Type valueType = schema.findType(getFieldId(array.getElementType().getField("value"))); ValueWriter<?>[] writers = ((SparkValueWriters.StructWriter) elementWriter).writers; return SparkValueWriters.arrayMap( writers[0], convert(keyType), writers[1], convert(valueType)); } Type elementType = schema.findType(AvroSchemaUtil.getElementId(array)); return SparkValueWriters.array(elementWriter, convert(elementType)); }