@Test public void testThrowsForMissingNotNullableField() throws Exception { Schema schema = Schema.builder().addByteField("f_byte").addStringField("f_string").build(); String rowString = "{\n" + "\"f_byte\" : 12\n" + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); thrown.expect(UnsupportedRowJsonException.class); thrown.expectMessage("'f_string' is not present"); newObjectMapperWith(deserializer).readValue(rowString, Row.class); }
@Test public void testThrowsForMismatchedRowField() throws Exception { Schema nestedRowSchema = Schema.builder().addInt32Field("f_nestedInt32").addStringField("f_nestedString").build(); Schema schema = Schema.builder().addInt32Field("f_int32").addRowField("f_row", nestedRowSchema).build(); String rowString = "{\n" + "\"f_int32\" : 32,\n" + "\"f_row\" : []\n" // expect object, get array + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); thrown.expect(UnsupportedRowJsonException.class); thrown.expectMessage("Expected JSON object"); newObjectMapperWith(deserializer).readValue(rowString, Row.class); }
@Test public void testParsesRowField() throws Exception { Schema nestedRowSchema = Schema.builder().addInt32Field("f_nestedInt32").addStringField("f_nestedString").build(); Schema schema = Schema.builder().addInt32Field("f_int32").addRowField("f_row", nestedRowSchema).build(); String rowString = "{\n" + "\"f_int32\" : 32,\n" + "\"f_row\" : {\n" + " \"f_nestedInt32\" : 54,\n" + " \"f_nestedString\" : \"foo\"\n" + " }\n" + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); Row parsedRow = newObjectMapperWith(deserializer).readValue(rowString, Row.class); Row expectedRow = Row.withSchema(schema) .addValues(32, Row.withSchema(nestedRowSchema).addValues(54, "foo").build()) .build(); assertEquals(expectedRow, parsedRow); }
@Test public void testSelect() { PCollection<PersonBean> input = PBegin.in(pipeline) .apply("input", Create.of(new PersonBean("Foo", 5), new PersonBean("Bar", 53))); String sql = "SELECT name, ageYears FROM PCOLLECTION"; PCollection<Row> result = input.apply("sql", SqlTransform.query(sql)); PAssert.that(result) .containsInAnyOrder( TestUtils.rowsBuilderOf( Schema.builder().addStringField("name").addInt32Field("ageYears").build()) .addRows( "Foo", 5, "Bar", 53) .getRows()); pipeline.run(); }
@Test public void testFieldAccessToNestedRow() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery( "SELECT nestedRowTestTable.col.RowField.string_field, nestedRowTestTable.col.RowFieldTwo.long_field FROM nestedRowTestTable")); PAssert.that(stream) .containsInAnyOrder( Row.withSchema( Schema.builder().addStringField("field1").addInt64Field("field2").build()) .addValues("inner_str_one", 3L) .build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); }
@Test @Category(NeedsRunner.class) public void testOutputCoders() { Schema keySchema = Schema.builder().addStringField("field1").build(); PCollection<KV<Row, Iterable<POJO>>> grouped = pipeline .apply(Create.of(new POJO("key1", 1, "value1"))) .apply(Group.byFieldNames("field1")); // Make sure that the key has the right schema. PCollection<Row> keys = grouped.apply(Keys.create()); assertTrue(keys.getSchema().equivalent(keySchema)); // Make sure that the value has the right schema. PCollection<POJO> values = grouped.apply(Values.create()).apply(Flatten.iterables()); assertTrue(values.getSchema().equivalent(POJO_SCHEMA)); pipeline.run(); }
@Test public void testPrimitiveNotEquivalent() { Schema schema1 = Schema.builder().addInt64Field("foo").build(); Schema schema2 = Schema.builder().addStringField("foo").build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); schema1 = Schema.builder().addInt64Field("foo").build(); schema2 = Schema.builder().addInt64Field("bar").build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); schema1 = Schema.builder().addInt64Field("foo").build(); schema2 = Schema.builder().addNullableField("foo", FieldType.INT64).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); }
@Test public void testRowConstructor() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery("SELECT ROW(1, ROW(2, 3), 'str', ROW('str2', 'str3'))")); PAssert.that(stream) .containsInAnyOrder( Row.withSchema( Schema.builder() .addInt32Field("field1") .addInt32Field("field2") .addInt32Field("field3") .addStringField("field4") .addStringField("field5") .addStringField("field6") .build()) .addValues(1, 2, 3, "str", "str2", "str3") .build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); } }
@Test public void testThrowsForNonExistentField() { Schema schema = Schema.builder().addStringField("f_string").addInt32Field("f_int").build(); List<BeamSqlExpression> elements = ImmutableList.of( BeamSqlPrimitive.of( SqlTypeName.ROW, Row.withSchema(schema).addValues("aaa", 14).build()), BeamSqlPrimitive.of(SqlTypeName.VARCHAR, "f_nonExistent")); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Cannot find field"); new BeamSqlDotExpression(elements, SqlTypeName.VARCHAR) .evaluate(NULL_ROW, NULL_WINDOW, BeamSqlExpressionEnvironments.empty()); } }
@Test public void testArrayOfRow() throws Exception { Schema nestedSchema = Schema.builder().addInt32Field("f1_int").addStringField("f1_str").build(); FieldType collectionElementType = FieldType.row(nestedSchema); Schema schema = Schema.builder().addArrayField("f_array", collectionElementType).build(); Row row = Row.withSchema(schema) .addArray( Row.withSchema(nestedSchema).addValues(1, "one").build(), Row.withSchema(nestedSchema).addValues(2, "two").build(), Row.withSchema(nestedSchema).addValues(3, "three").build()) .build(); CoderProperties.coderDecodeEncodeEqual(RowCoder.of(schema), row); }
@Test public void testThrowsIfExtraFieldsExist() { PubsubJsonTableProvider provider = new PubsubJsonTableProvider(); Schema messageSchema = Schema.builder() .addDateTimeField("event_timestamp") .addMapField("attributes", VARCHAR, VARCHAR) .addStringField("someField") .addRowField("payload", Schema.builder().build()) .build(); Table tableDefinition = tableDefinition().schema(messageSchema).build(); thrown.expectMessage("Unsupported"); thrown.expectMessage("'event_timestamp'"); provider.buildBeamSqlTable(tableDefinition); }
@Ignore("https://issues.apache.org/jira/browse/BEAM-5189") @Test public void testSelectInnerRowOfNestedRow() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery("SELECT nestedRowTestTable.col.RowField FROM nestedRowTestTable")); PAssert.that(stream) .containsInAnyOrder( Row.withSchema( Schema.builder().addStringField("field1").addInt64Field("field2").build()) .addValues("inner_str_one", 1L) .build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); }
@Test public void testToHex() throws Exception { Schema resultType = Schema.builder().addStringField("field").build(); Row resultRow = Row.withSchema(resultType).addValue("666f6f626172").build(); Row resultRow2 = Row.withSchema(resultType).addValue("20").build(); Row resultRow3 = Row.withSchema(resultType).addValue("616263414243").build(); Row resultRow4 = Row.withSchema(resultType).addValue("616263414243d0b6d189d184d096d0a9d0a4").build(); String sql = "SELECT TO_HEX(f_bytes) FROM PCOLLECTION WHERE f_func = 'TO_HEX'"; PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql)); PAssert.that(result).containsInAnyOrder(resultRow, resultRow2, resultRow3, resultRow4); pipeline.run().waitUntilFinish(); }
@Test public void testReturnsFieldValue() { Schema schema = Schema.builder().addStringField("f_string").addInt32Field("f_int").build(); List<BeamSqlExpression> elements = ImmutableList.of( BeamSqlPrimitive.of( SqlTypeName.ROW, Row.withSchema(schema).addValues("aaa", 14).build()), BeamSqlPrimitive.of(SqlTypeName.VARCHAR, "f_string")); BeamSqlDotExpression arrayExpression = new BeamSqlDotExpression(elements, SqlTypeName.VARCHAR); assertEquals( "aaa", arrayExpression .evaluate(NULL_ROW, NULL_WINDOW, BeamSqlExpressionEnvironments.empty()) .getValue()); }
@Test public void testProject() { PCollection<PersonBean> input = PBegin.in(pipeline) .apply("input", Create.of(new PersonBean("Foo", 5), new PersonBean("Bar", 53))); String sql = "SELECT name FROM PCOLLECTION"; PCollection<Row> result = input.apply("sql", SqlTransform.query(sql)); PAssert.that(result) .containsInAnyOrder( TestUtils.rowsBuilderOf(Schema.builder().addStringField("name").build()) .addRows("Foo", "Bar") .getRows()); pipeline.run(); }
@Test public void testNestedTypes() throws Exception { Schema nestedSchema = Schema.builder().addInt32Field("f1_int").addStringField("f1_str").build(); Schema schema = Schema.builder().addInt32Field("f_int").addRowField("nested", nestedSchema).build(); Row nestedRow = Row.withSchema(nestedSchema).addValues(18, "foobar").build(); Row row = Row.withSchema(schema).addValues(42, nestedRow).build(); CoderProperties.coderDecodeEncodeEqual(RowCoder.of(schema), row); }
@Test public void testAccessArrayElement() { PCollection<Row> input = pCollectionOf2Elements(); Schema resultType = Schema.builder().addStringField("f_arrElem").build(); PCollection<Row> result = input.apply("sqlQuery", SqlTransform.query("SELECT f_stringArr[1] FROM PCOLLECTION")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(resultType).addValues("111").build(), Row.withSchema(resultType).addValues("33").build()); pipeline.run(); }
@Test public void testNestedMapsNotEquivalent() { Schema nestedSchema1 = Schema.builder().addInt64Field("foo").build(); Schema nestedSchema2 = Schema.builder().addStringField("foo").build(); Schema schema1 = Schema.builder().addMapField("foo", FieldType.STRING, FieldType.row(nestedSchema1)).build(); Schema schema2 = Schema.builder().addMapField("foo", FieldType.STRING, FieldType.row(nestedSchema2)).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); } }
@Test public void testNestedNotEquivalent() { Schema nestedSchema1 = Schema.builder().addInt64Field("foo").build(); Schema nestedSchema2 = Schema.builder().addStringField("foo").build(); Schema schema1 = Schema.builder().addRowField("foo", nestedSchema1).build(); Schema schema2 = Schema.builder().addRowField("foo", nestedSchema2).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); }
@Test public void testNestedArraysNotEquivalent() { Schema nestedSchema1 = Schema.builder().addInt64Field("foo").build(); Schema nestedSchema2 = Schema.builder().addStringField("foo").build(); Schema schema1 = Schema.builder().addArrayField("foo", FieldType.row(nestedSchema1)).build(); Schema schema2 = Schema.builder().addArrayField("foo", FieldType.row(nestedSchema2)).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); }