@Test public void testListProjection() throws IOException { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(10, "values", Types.ListType.ofOptional(11, Types.LongType.get())) ); List<Long> values = ImmutableList.of(56L, 57L, 58L); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); record.put("values", values); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); Assert.assertNull("Should not project values list", projected.get("values")); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); Assert.assertNull("Should not project id", projected.get("id")); Assert.assertEquals("Should project entire list", values, projected.get("values")); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); Assert.assertNull("Should not project id", projected.get("id")); Assert.assertEquals("Should project entire list", values, projected.get("values")); }
@Test public void testIdentityConversions() { List<Pair<Literal<?>, Type>> pairs = Arrays.asList( Pair.of(Literal.of(true), Types.BooleanType.get()), Pair.of(Literal.of(34), Types.IntegerType.get()), Pair.of(Literal.of(34L), Types.LongType.get()), Pair.of(Literal.of(34.11F), Types.FloatType.get()), Pair.of(Literal.of(34.55D), Types.DoubleType.get()), Pair.of(Literal.of("34.55"), Types.DecimalType.of(9, 2)), Pair.of(Literal.of("2017-08-18"), Types.DateType.get()), Pair.of(Literal.of("14:21:01.919"), Types.TimeType.get()), Pair.of(Literal.of("2017-08-18T14:21:01.919"), Types.TimestampType.withoutZone()), Pair.of(Literal.of("abc"), Types.StringType.get()), Pair.of(Literal.of(UUID.randomUUID()), Types.UUIDType.get()), Pair.of(Literal.of(new byte[] {0, 1, 2}), Types.FixedType.ofLength(3)), Pair.of(Literal.of(ByteBuffer.wrap(new byte[] {0, 1, 2})), Types.BinaryType.get()) ); for (Pair<Literal<?>, Type> pair : pairs) { Literal<?> lit = pair.first(); Type type = pair.second(); // first, convert the literal to the target type (date/times start as strings) Literal<?> expected = lit.to(type); // then check that converting again to the same type results in an identical literal Assert.assertSame("Converting twice should produce identical values", expected, expected.to(type)); } }
@Test public void testListProjection() throws IOException { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(10, "values", Types.ListType.ofOptional(11, Types.LongType.get())) ); List<Long> values = ImmutableList.of(56L, 57L, 58L); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); record.put("values", values); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); Assert.assertNull("Should not project values list", projected.get("values")); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); Assert.assertNull("Should not project id", projected.get("id")); Assert.assertEquals("Should project entire list", values, projected.get("values")); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); Assert.assertNull("Should not project id", projected.get("id")); Assert.assertEquals("Should project entire list", values, projected.get("values")); }
@Test public void testBasicProjection() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(writeSchema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record); Assert.assertNull("Should not project data", projected.getField("data")); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Schema dataOnly = new Schema( Types.NestedField.optional(1, "data", Types.StringType.get()) ); projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.getField("data")); Assert.assertTrue("Should contain the correct data value", cmp == 0); }
@Test public void testListProjection() throws IOException { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(10, "values", Types.ListType.ofOptional(11, Types.LongType.get())) ); List<Long> values = ImmutableList.of(56L, 57L, 58L); Record record = GenericRecord.create(writeSchema.asStruct()); record.setField("id", 34L); record.setField("values", values); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); Assert.assertNull("Should not project values list", projected.getField("values")); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire list", values, projected.getField("values")); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); Assert.assertNull("Should not project id", projected.getField("id")); Assert.assertEquals("Should project entire list", values, projected.getField("values")); }
@Test public void testListProjection() throws IOException { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(10, "values", Types.ListType.ofOptional(11, Types.LongType.get())) ); List<Long> values = ImmutableList.of(56L, 57L, 58L); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); record.put("values", values); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("id_only", writeSchema, idOnly, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); Assert.assertNull("Should not project values list", projected.get("values")); Schema elementOnly = writeSchema.select("values.element"); projected = writeAndRead("element_only", writeSchema, elementOnly, record); Assert.assertNull("Should not project id", projected.get("id")); Assert.assertEquals("Should project entire list", values, projected.get("values")); Schema listOnly = writeSchema.select("values"); projected = writeAndRead("list_only", writeSchema, listOnly, record); Assert.assertNull("Should not project id", projected.get("id")); Assert.assertEquals("Should project entire list", values, projected.get("values")); }
static StructType getType(StructType partitionType) { // IDs start at 100 to leave room for changes to ManifestEntry return StructType.of( required(100, "file_path", StringType.get()), required(101, "file_format", StringType.get()), required(102, "partition", partitionType), required(103, "record_count", LongType.get()), required(104, "file_size_in_bytes", LongType.get()), required(105, "block_size_in_bytes", LongType.get()), optional(106, "file_ordinal", IntegerType.get()), optional(107, "sort_columns", ListType.ofRequired(112, IntegerType.get())), optional(108, "column_sizes", MapType.ofRequired(117, 118, IntegerType.get(), LongType.get())), optional(109, "value_counts", MapType.ofRequired(119, 120, IntegerType.get(), LongType.get())), optional(110, "null_value_counts", MapType.ofRequired(121, 122, IntegerType.get(), LongType.get())), optional(125, "lower_bounds", MapType.ofRequired(126, 127, IntegerType.get(), BinaryType.get())), optional(128, "upper_bounds", MapType.ofRequired(129, 130, IntegerType.get(), BinaryType.get())) // NEXT ID TO ASSIGN: 131 ); }
@Test public void testBasicProjection() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); record.put("data", "test"); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record); Assert.assertNull("Should not project data", projected.get("data")); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); Schema dataOnly = new Schema( Types.NestedField.optional(1, "data", Types.StringType.get()) ); projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record); Assert.assertNull("Should not project id", projected.get("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.get("data")); Assert.assertTrue("Should contain the correct data value", cmp == 0); }
@Test public void testBasicProjection() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); record.put("data", "test"); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record); Assert.assertNull("Should not project data", projected.get("data")); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); Schema dataOnly = new Schema( Types.NestedField.optional(1, "data", Types.StringType.get()) ); projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record); Assert.assertNull("Should not project id", projected.get("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.get("data")); Assert.assertEquals("Should contain the correct data value", 0, cmp); }
@Test public void testIdentityTypes() throws Exception { // these types make a strong guarantee than equality, instances are identical Type[] identityPrimitives = new Type[] { Types.BooleanType.get(), Types.IntegerType.get(), Types.LongType.get(), Types.FloatType.get(), Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.BinaryType.get() }; for (Type type : identityPrimitives) { Assert.assertSame("Serialization result should be identical to starting type", type, TestHelpers.roundTripSerialize(type)); } }
public static boolean isPromotionAllowed(Type from, Type.PrimitiveType to) { // Warning! Before changing this function, make sure that the type change doesn't introduce // compatibility problems in partitioning. if (from.equals(to)) { return true; } switch (from.typeId()) { case INTEGER: return to == Types.LongType.get(); case FLOAT: return to == Types.DoubleType.get(); case DECIMAL: Types.DecimalType fromDecimal = (Types.DecimalType) from; if (to.typeId() != Type.TypeID.DECIMAL) { return false; } Types.DecimalType toDecimal = (Types.DecimalType) to; return (fromDecimal.scale() == toDecimal.scale() && fromDecimal.precision() <= toDecimal.precision()); } return false; }
@Test public void testBasicProjection() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); record.put("data", "test"); Schema idOnly = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record); Assert.assertNull("Should not project data", projected.get("data")); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); Schema dataOnly = new Schema( Types.NestedField.optional(1, "data", Types.StringType.get()) ); projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record); Assert.assertNull("Should not project id", projected.get("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.get("data")); Assert.assertTrue("Should contain the correct data value", cmp == 0); }
@Test public void testReorderedProjection() throws Exception { // Assume.assumeTrue( // "Spark's Parquet read support does not support reordered columns", // !format.equalsIgnoreCase("parquet")); Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(schema, "table")); record.put("id", 34L); record.put("data", "test"); Schema reordered = new Schema( Types.NestedField.optional(2, "missing_1", Types.StringType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()), Types.NestedField.optional(3, "missing_2", Types.LongType.get()) ); Record projected = writeAndRead("reordered_projection", schema, reordered, record); Assert.assertNull("Should contain the correct 0 value", projected.get(0)); Assert.assertEquals("Should contain the correct 1 value", "test", projected.get(1).toString()); Assert.assertNull("Should contain the correct 2 value", projected.get(2)); }
@Test public void testReorderedFullProjection() throws Exception { // Assume.assumeTrue( // "Spark's Parquet read support does not support reordered columns", // !format.equalsIgnoreCase("parquet")); Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(schema, "table")); record.put("id", 34L); record.put("data", "test"); Schema reordered = new Schema( Types.NestedField.optional(1, "data", Types.StringType.get()), Types.NestedField.required(0, "id", Types.LongType.get()) ); Record projected = writeAndRead("reordered_full_projection", schema, reordered, record); Assert.assertEquals("Should contain the correct 0 value", "test", projected.get(0).toString()); Assert.assertEquals("Should contain the correct 1 value", 34L, projected.get(1)); }
@Test public void testRename() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(writeSchema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Schema readSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "renamed", Types.StringType.get()) ); Record projected = writeAndRead("project_and_rename", writeSchema, readSchema, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.getField("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.getField("renamed")); Assert.assertTrue("Should contain the correct data/renamed value", cmp == 0); }
@Test public void testReorderedProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = GenericRecord.create(schema.asStruct()); record.setField("id", 34L); record.setField("data", "test"); Schema reordered = new Schema( Types.NestedField.optional(2, "missing_1", Types.StringType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()), Types.NestedField.optional(3, "missing_2", Types.LongType.get()) ); Record projected = writeAndRead("full_projection", schema, reordered, record); Assert.assertNull("Should contain the correct 0 value", projected.get(0)); Assert.assertEquals("Should contain the correct 1 value", "test", projected.get(1).toString()); Assert.assertNull("Should contain the correct 2 value", projected.get(2)); }
@Test public void testRename() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); record.put("data", "test"); Schema readSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "renamed", Types.StringType.get()) ); Record projected = writeAndRead("project_and_rename", writeSchema, readSchema, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.get("renamed")); Assert.assertEquals("Should contain the correct data/renamed value", 0, cmp); }
@Test public void testRename() throws Exception { Schema writeSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); record.put("id", 34L); record.put("data", "test"); Schema readSchema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "renamed", Types.StringType.get()) ); Record projected = writeAndRead("project_and_rename", writeSchema, readSchema, record); Assert.assertEquals("Should contain the correct id value", 34L, (long) projected.get("id")); int cmp = Comparators.charSequences() .compare("test", (CharSequence) projected.get("renamed")); Assert.assertTrue("Should contain the correct data/renamed value", cmp == 0); }
@Test public void testReorderedProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(schema, "table")); record.put("id", 34L); record.put("data", "test"); Schema reordered = new Schema( Types.NestedField.optional(2, "missing_1", Types.StringType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()), Types.NestedField.optional(3, "missing_2", Types.LongType.get()) ); Record projected = writeAndRead("full_projection", schema, reordered, record); Assert.assertNull("Should contain the correct 0 value", projected.get(0)); Assert.assertEquals("Should contain the correct 1 value", "test", projected.get(1).toString()); Assert.assertNull("Should contain the correct 2 value", projected.get(2)); }
@Test public void testReorderedProjection() throws Exception { Schema schema = new Schema( Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()) ); Record record = new Record(AvroSchemaUtil.convert(schema, "table")); record.put("id", 34L); record.put("data", "test"); Schema reordered = new Schema( Types.NestedField.optional(2, "missing_1", Types.StringType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()), Types.NestedField.optional(3, "missing_2", Types.LongType.get()) ); Record projected = writeAndRead("full_projection", schema, reordered, record); Assert.assertNull("Should contain the correct 0 value", projected.get(0)); Assert.assertEquals("Should contain the correct 1 value", "test", projected.get(1).toString()); Assert.assertNull("Should contain the correct 2 value", projected.get(2)); }