private void runPartialFieldsInRows(PCollection<Row> input) throws Exception { String sql = "SELECT f_int, f_long FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testPartialFieldsInRows", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int").addInt64Field("f_long").build(); List<Row> expectedRows = IntStream.range(0, 4).mapToObj(i -> rowAtIndex(resultType, i)).collect(toList()); PAssert.that(result).containsInAnyOrder(expectedRows); pipeline.run().waitUntilFinish(); }
private void runPartialFieldsInMultipleRow(PCollection<Row> input) throws Exception { String sql = "SELECT f_int, f_long FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testPartialFieldsInMultipleRow", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int").addInt64Field("f_long").build(); List<Row> expectedRows = IntStream.range(0, 4).mapToObj(i -> rowAtIndex(resultType, i)).collect(toList()); PAssert.that(result).containsInAnyOrder(expectedRows); pipeline.run().waitUntilFinish(); }
@Test public void testParsesArrayField() throws Exception { Schema schema = Schema.builder() .addInt32Field("f_int32") .addArrayField("f_intArray", FieldType.INT32) .build(); String rowString = "{\n" + "\"f_int32\" : 32,\n" + "\"f_intArray\" : [ 1, 2, 3, 4, 5]\n" + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); Row parsedRow = newObjectMapperWith(deserializer).readValue(rowString, Row.class); Row expectedRow = Row.withSchema(schema).addValues(32, Arrays.asList(1, 2, 3, 4, 5)).build(); assertEquals(expectedRow, parsedRow); }
@Before public void setUp() { Schema schema = Schema.builder() .addNullableField("f_int1", Schema.FieldType.INT32) .addNullableField("f_int2", Schema.FieldType.INT32) .addInt32Field("f_int3") .build(); List<Row> rows = TestUtils.RowsBuilder.of(schema) .addRows(1, 5, 1) .addRows(null, 1, 1) .addRows(2, 1, 1) .addRows(null, 1, 1) .addRows(null, null, 1) .addRows(null, null, 1) .addRows(3, 2, 1) .getRows(); boundedInput = PBegin.in(pipeline).apply(Create.of(rows).withSchema(schema, identity(), identity())); }
/** GROUP-BY with UDAF. */ @Test public void testUdaf() throws Exception { Schema resultType = Schema.builder().addInt32Field("f_int2").addInt32Field("squaresum").build(); Row row = Row.withSchema(resultType).addValues(0, 30).build(); String sql1 = "SELECT f_int2, squaresum1(f_int) AS `squaresum`" + " FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> result1 = boundedInput1.apply( "testUdaf1", SqlTransform.query(sql1).registerUdaf("squaresum1", new SquareSum())); PAssert.that(result1).containsInAnyOrder(row); String sql2 = "SELECT f_int2, squaresum2(f_int) AS `squaresum`" + " FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> result2 = PCollectionTuple.of(new TupleTag<>("PCOLLECTION"), boundedInput1) .apply( "testUdaf2", SqlTransform.query(sql2).registerUdaf("squaresum2", new SquareSum())); PAssert.that(result2).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
@Test public void testThrowsForMismatchedRowField() throws Exception { Schema nestedRowSchema = Schema.builder().addInt32Field("f_nestedInt32").addStringField("f_nestedString").build(); Schema schema = Schema.builder().addInt32Field("f_int32").addRowField("f_row", nestedRowSchema).build(); String rowString = "{\n" + "\"f_int32\" : 32,\n" + "\"f_row\" : []\n" // expect object, get array + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); thrown.expect(UnsupportedRowJsonException.class); thrown.expectMessage("Expected JSON object"); newObjectMapperWith(deserializer).readValue(rowString, Row.class); }
@Before public void setUp() { Schema schema = Schema.builder() .addInt32Field("f_int") .addDoubleField("f_double") .addInt32Field("f_int2") .build(); List<Row> rowsInTableB = TestUtils.RowsBuilder.of(schema) .addRows( 1, 1.0, 0, 4, 4.0, 0, 7, 7.0, 0, 13, 13.0, 0, 5, 5.0, 0, 10, 10.0, 0, 17, 17.0, 0) .getRows(); boundedInput = pipeline.apply( Create.of(rowsInTableB) .withSchema( schema, SerializableFunctions.identity(), SerializableFunctions.identity())); }
/** * Test that correct exception is thrown when subclass of {@link CombineFn} is not parameterized. * BEAM-3777 */ @Test public void testRawCombineFnSubclass() { exceptions.expect(ParseException.class); exceptions.expectCause(hasMessage(containsString("CombineFn must be parameterized"))); pipeline.enableAbandonedNodeEnforcement(false); Schema resultType = Schema.builder().addInt32Field("f_int2").addInt32Field("squaresum").build(); Row row = Row.withSchema(resultType).addValues(0, 354).build(); String sql1 = "SELECT f_int2, squaresum(f_int) AS `squaresum`" + " FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> result1 = boundedInput1.apply( "testUdaf", SqlTransform.query(sql1).registerUdaf("squaresum", new RawCombineFn())); }
@Test public void testReturnsFieldValue() { Schema schema = Schema.builder().addStringField("f_string").addInt32Field("f_int").build(); List<BeamSqlExpression> elements = ImmutableList.of( BeamSqlPrimitive.of( SqlTypeName.ROW, Row.withSchema(schema).addValues("aaa", 14).build()), BeamSqlPrimitive.of(SqlTypeName.VARCHAR, "f_string")); BeamSqlDotExpression arrayExpression = new BeamSqlDotExpression(elements, SqlTypeName.VARCHAR); assertEquals( "aaa", arrayExpression .evaluate(NULL_ROW, NULL_WINDOW, BeamSqlExpressionEnvironments.empty()) .getValue()); }
@Test public void testThrowsForNonExistentField() { Schema schema = Schema.builder().addStringField("f_string").addInt32Field("f_int").build(); List<BeamSqlExpression> elements = ImmutableList.of( BeamSqlPrimitive.of( SqlTypeName.ROW, Row.withSchema(schema).addValues("aaa", 14).build()), BeamSqlPrimitive.of(SqlTypeName.VARCHAR, "f_nonExistent")); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Cannot find field"); new BeamSqlDotExpression(elements, SqlTypeName.VARCHAR) .evaluate(NULL_ROW, NULL_WINDOW, BeamSqlExpressionEnvironments.empty()); } }
@Test public void testArrayOfRow() throws Exception { Schema nestedSchema = Schema.builder().addInt32Field("f1_int").addStringField("f1_str").build(); FieldType collectionElementType = FieldType.row(nestedSchema); Schema schema = Schema.builder().addArrayField("f_array", collectionElementType).build(); Row row = Row.withSchema(schema) .addArray( Row.withSchema(nestedSchema).addValues(1, "one").build(), Row.withSchema(nestedSchema).addValues(2, "two").build(), Row.withSchema(nestedSchema).addValues(3, "three").build()) .build(); CoderProperties.coderDecodeEncodeEqual(RowCoder.of(schema), row); }
/** Test that an indirect subclass of a {@link CombineFn} works as a UDAF. BEAM-3777 */ @Test public void testUdafMultiLevelDescendent() { Schema resultType = Schema.builder().addInt32Field("f_int2").addInt32Field("squaresum").build(); Row row = Row.withSchema(resultType).addValues(0, 354).build(); String sql1 = "SELECT f_int2, double_square_sum(f_int) AS `squaresum`" + " FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> result1 = boundedInput1.apply( "testUdaf", SqlTransform.query(sql1).registerUdaf("double_square_sum", new SquareSquareSum())); PAssert.that(result1).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
@Test public void testCardinality() { PCollection<Row> input = pCollectionOf2Elements(); Schema resultType = Schema.builder().addInt32Field("f_size").build(); PCollection<Row> result = input.apply( "sqlQuery", SqlTransform.query("SELECT CARDINALITY(f_stringArr) FROM PCOLLECTION")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(resultType).addValues(2).build(), Row.withSchema(resultType).addValues(3).build()); pipeline.run(); }
private void runDistinct(PCollection<Row> input) throws Exception { String sql = "SELECT distinct f_int, f_long FROM PCOLLECTION "; PCollection<Row> result = input.apply("testDistinct", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int").addInt64Field("f_long").build(); List<Row> expectedRows = TestUtils.RowsBuilder.of(resultType) .addRows( 1, 1000L, 2, 2000L, 3, 3000L, 4, 4000L) .getRows(); PAssert.that(result).containsInAnyOrder(expectedRows); pipeline.run().waitUntilFinish(); }
@Test public void testNestedTypes() throws Exception { Schema nestedSchema = Schema.builder().addInt32Field("f1_int").addStringField("f1_str").build(); Schema schema = Schema.builder().addInt32Field("f_int").addRowField("nested", nestedSchema).build(); Row nestedRow = Row.withSchema(nestedSchema).addValues(18, "foobar").build(); Row row = Row.withSchema(schema).addValues(42, nestedRow).build(); CoderProperties.coderDecodeEncodeEqual(RowCoder.of(schema), row); }
/** test auto-provider UDF/UDAF. */ @Test public void testAutoUdfUdaf() throws Exception { Schema resultType = Schema.builder().addInt32Field("f_int2").addInt32Field("autoload_squarecubicsum").build(); Row row = Row.withSchema(resultType).addValues(0, 4890).build(); String sql = "SELECT f_int2, autoload_squaresum(autoload_cubic(f_int)) AS `autoload_squarecubicsum`" + " FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> result = boundedInput1.apply("testUdaf", SqlTransform.query(sql).withAutoUdfUdafLoad(true)); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
private List<Row> rowsWithSingleIntField(String fieldName, List<Integer> values) { return TestUtils.rowsBuilderOf(Schema.builder().addInt32Field(fieldName).build()) .addRows(values) .getRows(); }
private void runPartialFields(PCollection<Row> input) throws Exception { String sql = "SELECT f_int, f_long FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testPartialFields", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int").addInt64Field("f_long").build(); Row row = rowAtIndex(resultType, 0); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
private void runAggregationWithoutWindow(PCollection<Row> input) throws Exception { String sql = "SELECT f_int2, COUNT(*) AS `getFieldCount` FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> result = input.apply("testAggregationWithoutWindow", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int2").addInt64Field("size").build(); Row row = Row.withSchema(resultType).addValues(0, 4L).build(); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
public void runLiteralField(PCollection<Row> input) throws Exception { String sql = "SELECT 1 as literal_field FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testLiteralField", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("literal_field").build(); Row row = Row.withSchema(resultType).addValues(1).build(); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }