@Override public Optional<DataSourceWriter> createWriter(String jobId, StructType dfStruct, SaveMode mode, DataSourceOptions options) { Preconditions.checkArgument(mode == SaveMode.Append, "Save mode %s is not supported", mode); Table table = findTable(options); Schema dfSchema = SparkSchemaUtil.convert(table.schema(), dfStruct); List<String> errors = CheckCompatibility.writeCompatibilityErrors(table.schema(), dfSchema); if (!errors.isEmpty()) { StringBuilder sb = new StringBuilder(); sb.append("Cannot write incompatible dataframe to table with schema:\n") .append(table.schema()).append("\nProblems:"); for (String error : errors) { sb.append("\n* ").append(error); } throw new IllegalArgumentException(sb.toString()); } Optional<String> formatOption = options.get("iceberg.write.format"); FileFormat format; if (formatOption.isPresent()) { format = FileFormat.valueOf(formatOption.get().toUpperCase(Locale.ENGLISH)); } else { format = FileFormat.valueOf(table.properties() .getOrDefault(DEFAULT_FILE_FORMAT, DEFAULT_FILE_FORMAT_DEFAULT) .toUpperCase(Locale.ENGLISH)); } return Optional.of(new Writer(table, lazyConf(), format)); }
/** * Returns a list of compatibility errors for writing with the given write schema. * * @param readSchema a read schema * @param writeSchema a write schema * @return a list of error details, or an empty list if there are no compatibility problems */ public static List<String> writeCompatibilityErrors(Schema readSchema, Schema writeSchema) { return TypeUtil.visit(readSchema, new CheckCompatibility(writeSchema, true)); }
@Test public void testStructReadReordering() { // reads should allow reordering Schema read = new Schema(required(0, "nested", Types.StructType.of( required(1, "field_a", Types.IntegerType.get()), required(2, "field_b", Types.IntegerType.get()) ))); Schema write = new Schema(required(0, "nested", Types.StructType.of( required(2, "field_b", Types.IntegerType.get()), required(1, "field_a", Types.IntegerType.get()) ))); List<String> errors = CheckCompatibility.readCompatibilityErrors(read, write); Assert.assertEquals("Should produce no error messages", 0, errors.size()); } }
@Test public void testRequiredSchemaField() { Schema write = new Schema(optional(1, "from_field", Types.IntegerType.get())); Schema read = new Schema(required(1, "to_field", Types.IntegerType.get())); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain that a required column is optional", errors.get(0).contains("should be required, but is optional")); }
/** * Returns a list of compatibility errors for reading with the given read schema. * * @param readSchema a read schema * @param writeSchema a write schema * @return a list of error details, or an empty list if there are no compatibility problems */ public static List<String> readCompatibilityErrors(Schema readSchema, Schema writeSchema) { return TypeUtil.visit(readSchema, new CheckCompatibility(writeSchema, false)); }
@Test public void testMissingSchemaField() { Schema write = new Schema(required(0, "other_field", Types.IntegerType.get())); Schema read = new Schema(required(1, "to_field", Types.IntegerType.get())); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain that a required column is missing", errors.get(0).contains("is required, but is missing")); }
@Test public void testIncompatibleListAndPrimitive() { Schema write = new Schema(required(0, "list_field", Types.ListType.ofOptional( 1, Types.IntegerType.get() ))); Schema read = new Schema(required(0, "list_field", Types.StringType.get())); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain about incompatible types", errors.get(0).contains("list cannot be read as a string")); }
@Test public void testIncompatibleStructAndPrimitive() { Schema write = new Schema(required(0, "nested", Types.StructType.of( required(1, "from_field", Types.StringType.get()) ))); Schema read = new Schema(required(0, "nested", Types.StringType.get())); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain about incompatible types", errors.get(0).contains("struct cannot be read as a string")); }
@Test public void testIncompatibleListElement() { Schema write = new Schema(required(0, "list_field", Types.ListType.ofOptional( 1, Types.IntegerType.get() ))); Schema read = new Schema(required(0, "list_field", Types.ListType.ofOptional( 1, Types.StringType.get() ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain about incompatible types", errors.get(0).contains("cannot be promoted to string")); }
@Test public void testIncompatibleMapAndPrimitive() { Schema write = new Schema(required(0, "map_field", Types.MapType.ofOptional( 1, 2, Types.StringType.get(), Types.IntegerType.get() ))); Schema read = new Schema(required(0, "map_field", Types.StringType.get())); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain about incompatible types", errors.get(0).contains("map cannot be read as a string")); }
@Test public void testRequiredListElement() { Schema write = new Schema(required(0, "list_field", Types.ListType.ofOptional( 1, Types.IntegerType.get() ))); Schema read = new Schema(required(0, "list_field", Types.ListType.ofRequired( 1, Types.IntegerType.get() ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain that elements are optional", errors.get(0).contains("elements should be required, but are optional")); }
@Test public void testMultipleErrors() { // required field is optional and cannot be promoted to the read type Schema write = new Schema(required(0, "nested", Types.StructType.of( optional(1, "from_field", Types.IntegerType.get()) ))); Schema read = new Schema(required(0, "nested", Types.StructType.of( required(1, "to_field", Types.FloatType.get()) ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 2, errors.size()); Assert.assertTrue("Should complain that a required field is optional", errors.get(0).contains("should be required, but is optional")); Assert.assertTrue("Should complain about incompatible types", errors.get(1).contains("cannot be promoted to float")); }
Schema fromSchema = new Schema(required(1, "from_field", from)); for (Type.PrimitiveType to : PRIMITIVES) { List<String> errors = CheckCompatibility.writeCompatibilityErrors( new Schema(required(1, "to_field", to)), fromSchema); )); List<String> errors = CheckCompatibility.writeCompatibilityErrors(structSchema, fromSchema); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); List<String> errors = CheckCompatibility.writeCompatibilityErrors(listSchema, fromSchema); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Types.MapType.ofRequired(2, 3, Types.StringType.get(), from))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(mapSchema, fromSchema); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Types.MapType.ofRequired(2, 3, from, Types.StringType.get()))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(mapSchema, fromSchema); Assert.assertEquals("Should produce 1 error message", 1, errors.size());
@Test public void testRequiredMapValue() { Schema write = new Schema(required(0, "map_field", Types.MapType.ofOptional( 1, 2, Types.StringType.get(), Types.IntegerType.get() ))); Schema read = new Schema(required(0, "map_field", Types.MapType.ofRequired( 1, 2, Types.StringType.get(), Types.IntegerType.get() ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain that values are optional", errors.get(0).contains("values should be required, but are optional")); }
@Test public void testIncompatibleMapKey() { Schema write = new Schema(required(0, "map_field", Types.MapType.ofOptional( 1, 2, Types.IntegerType.get(), Types.StringType.get() ))); Schema read = new Schema(required(0, "map_field", Types.MapType.ofOptional( 1, 2, Types.DoubleType.get(), Types.StringType.get() ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain about incompatible types", errors.get(0).contains("cannot be promoted to double")); }
@Test public void testMissingRequiredStructField() { Schema write = new Schema(required(0, "nested", Types.StructType.of( optional(2, "from_field", Types.IntegerType.get()) ))); Schema read = new Schema(required(0, "nested", Types.StructType.of( required(1, "to_field", Types.IntegerType.get()) ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain that a required field is missing", errors.get(0).contains("is required, but is missing")); }
@Test public void testIncompatibleMapValue() { Schema write = new Schema(required(0, "map_field", Types.MapType.ofOptional( 1, 2, Types.StringType.get(), Types.IntegerType.get() ))); Schema read = new Schema(required(0, "map_field", Types.MapType.ofOptional( 1, 2, Types.StringType.get(), Types.DoubleType.get() ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain about incompatible types", errors.get(0).contains("cannot be promoted to double")); }
@Test public void testIncompatibleStructField() { Schema write = new Schema(required(0, "nested", Types.StructType.of( required(1, "from_field", Types.IntegerType.get()) ))); Schema read = new Schema(required(0, "nested", Types.StructType.of( required(1, "to_field", Types.FloatType.get()) ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain about incompatible types", errors.get(0).contains("cannot be promoted to float")); }
@Test public void testRequiredStructField() { Schema write = new Schema(required(0, "nested", Types.StructType.of( optional(1, "from_field", Types.IntegerType.get()) ))); Schema read = new Schema(required(0, "nested", Types.StructType.of( required(1, "to_field", Types.IntegerType.get()) ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain that a required field is optional", errors.get(0).contains("should be required, but is optional")); }
@Test public void testStructWriteReordering() { // writes should not reorder fields Schema read = new Schema(required(0, "nested", Types.StructType.of( required(1, "field_a", Types.IntegerType.get()), required(2, "field_b", Types.IntegerType.get()) ))); Schema write = new Schema(required(0, "nested", Types.StructType.of( required(2, "field_b", Types.IntegerType.get()), required(1, "field_a", Types.IntegerType.get()) ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); System.err.println(errors); Assert.assertTrue("Should complain about field_b before field_a", errors.get(0).contains("field_b is out of order, before field_a")); }