private void runPartialFieldsInRows(PCollection<Row> input) throws Exception { String sql = "SELECT f_int, f_long FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testPartialFieldsInRows", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int").addInt64Field("f_long").build(); List<Row> expectedRows = IntStream.range(0, 4).mapToObj(i -> rowAtIndex(resultType, i)).collect(toList()); PAssert.that(result).containsInAnyOrder(expectedRows); pipeline.run().waitUntilFinish(); }
private void runPartialFieldsInMultipleRow(PCollection<Row> input) throws Exception { String sql = "SELECT f_int, f_long FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testPartialFieldsInMultipleRow", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int").addInt64Field("f_long").build(); List<Row> expectedRows = IntStream.range(0, 4).mapToObj(i -> rowAtIndex(resultType, i)).collect(toList()); PAssert.that(result).containsInAnyOrder(expectedRows); pipeline.run().waitUntilFinish(); }
@Test public void testPrimitiveTypes() throws Exception { Schema schema = Schema.builder() .addByteField("f_byte") .addInt16Field("f_int16") .addInt32Field("f_int32") .addInt64Field("f_int64") .addDecimalField("f_decimal") .addFloatField("f_float") .addDoubleField("f_double") .addStringField("f_string") .addDateTimeField("f_datetime") .addBooleanField("f_boolean") .build(); DateTime dateTime = new DateTime().withDate(1979, 03, 14).withTime(1, 2, 3, 4).withZone(DateTimeZone.UTC); Row row = Row.withSchema(schema) .addValues( (byte) 0, (short) 1, 2, 3L, new BigDecimal(2.3), 1.2f, 3.0d, "str", dateTime, false) .build(); CoderProperties.coderDecodeEncodeEqual(RowCoder.of(schema), row); }
@Test public void testEquivalent() { final Schema expectedNested1 = Schema.builder().addStringField("yard1").addInt64Field("yard2").build(); final Schema expectedSchema1 = Schema.builder() .addStringField("field1") .addInt64Field("field2") .addRowField("field3", expectedNested1) .addArrayField("field4", FieldType.row(expectedNested1)) .addMapField("field5", FieldType.STRING, FieldType.row(expectedNested1)) .build(); final Schema expectedNested2 = Schema.builder().addInt64Field("yard2").addStringField("yard1").build(); final Schema expectedSchema2 = Schema.builder() .addMapField("field5", FieldType.STRING, FieldType.row(expectedNested2)) .addArrayField("field4", FieldType.row(expectedNested2)) .addRowField("field3", expectedNested2) .addInt64Field("field2") .addStringField("field1") .build(); assertNotEquals(expectedSchema1, expectedSchema2); assertTrue(expectedSchema1.equivalent(expectedSchema2)); }
@Test @Category(NeedsRunner.class) public void testAggregateByMultipleFields() { Collection<AggregatePojos> elements = ImmutableList.of( new AggregatePojos(1, 1, 2), new AggregatePojos(2, 1, 3), new AggregatePojos(3, 2, 4), new AggregatePojos(4, 2, 5)); List<String> fieldNames = Lists.newArrayList("field1", "field2"); PCollection<Row> aggregate = pipeline .apply(Create.of(elements)) .apply( Group.<AggregatePojos>globally() .aggregateFields(fieldNames, new MultipleFieldCombineFn(), "field1+field2")); Schema outputSchema = Schema.builder().addInt64Field("field1+field2").build(); Row expectedRow = Row.withSchema(outputSchema).addValues(16L).build(); PAssert.that(aggregate).containsInAnyOrder(expectedRow); pipeline.run(); }
@Test public void testCountGroupByNullable() { String sql = "SELECT COUNT(f_int1) as c, f_int2 FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> out = boundedInput.apply(SqlTransform.query(sql)); Schema schema = out.getSchema(); PAssert.that(out) .containsInAnyOrder( Row.withSchema(schema).addValues(0L, null).build(), Row.withSchema(schema).addValues(1L, 1).build(), Row.withSchema(schema).addValues(1L, 5).build(), Row.withSchema(schema).addValues(1L, 2).build()); assertEquals( Schema.builder() // COUNT() is never nullable, and calcite knows it .addInt64Field("c") .addNullableField("f_int2", Schema.FieldType.INT32) .build(), schema); pipeline.run(); }
@Test public void testToEnumerable_collectMultiple() { Schema schema = Schema.builder().addInt64Field("id").addInt64Field("otherid").build(); RelDataType type = CalciteUtils.toCalciteRowType(schema, TYPE_FACTORY); ImmutableList<ImmutableList<RexLiteral>> tuples = ImmutableList.of( ImmutableList.of( rexBuilder.makeBigintLiteral(BigDecimal.ZERO), rexBuilder.makeBigintLiteral(BigDecimal.ONE))); BeamRelNode node = new BeamValuesRel(cluster, type, tuples, null); Enumerable<Object> enumerable = BeamEnumerableConverter.toEnumerable(options, node); Enumerator<Object> enumerator = enumerable.enumerator(); assertTrue(enumerator.moveNext()); Object[] row = (Object[]) enumerator.current(); assertEquals(2, row.length); assertEquals(0L, row[0]); assertEquals(1L, row[1]); assertFalse(enumerator.moveNext()); enumerator.close(); }
@Test public void testFieldAccessToNestedRow() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery( "SELECT nestedRowTestTable.col.RowField.string_field, nestedRowTestTable.col.RowFieldTwo.long_field FROM nestedRowTestTable")); PAssert.that(stream) .containsInAnyOrder( Row.withSchema( Schema.builder().addStringField("field1").addInt64Field("field2").build()) .addValues("inner_str_one", 3L) .build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); }
@Test public void testNestedArray() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery( "SELECT nestedArrayTestTable.col[1][3], nestedArrayTestTable.col[2][1] FROM nestedArrayTestTable")); PAssert.that(stream) .containsInAnyOrder( Row.withSchema(Schema.builder().addInt64Field("field1").addInt64Field("field2").build()) .addValues(3L, 4L) .build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); }
@Test public void testPrimitiveNotEquivalent() { Schema schema1 = Schema.builder().addInt64Field("foo").build(); Schema schema2 = Schema.builder().addStringField("foo").build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); schema1 = Schema.builder().addInt64Field("foo").build(); schema2 = Schema.builder().addInt64Field("bar").build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); schema1 = Schema.builder().addInt64Field("foo").build(); schema2 = Schema.builder().addNullableField("foo", FieldType.INT64).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); }
@Ignore("https://issues.apache.org/jira/browse/BEAM-5189") @Test public void testSelectInnerRowOfNestedRow() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery("SELECT nestedRowTestTable.col.RowField FROM nestedRowTestTable")); PAssert.that(stream) .containsInAnyOrder( Row.withSchema( Schema.builder().addStringField("field1").addInt64Field("field2").build()) .addValues("inner_str_one", 1L) .build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); }
@Test public void testRowWithArray() { BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(readOnlyTableProvider); PCollection<Row> stream = BeamSqlRelUtils.toPCollection( pipeline, sqlEnv.parseQuery( "SELECT rowWithArrayTestTable.col.field3[2] FROM rowWithArrayTestTable")); PAssert.that(stream) .containsInAnyOrder( Row.withSchema(Schema.builder().addInt64Field("int64").build()).addValue(6L).build()); pipeline.run().waitUntilFinish(Duration.standardMinutes(2)); }
private void runPartialFields(PCollection<Row> input) throws Exception { String sql = "SELECT f_int, f_long FROM TABLE_A"; PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input) .apply("testPartialFields", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int").addInt64Field("f_long").build(); Row row = rowAtIndex(resultType, 0); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
private void runAggregationWithoutWindow(PCollection<Row> input) throws Exception { String sql = "SELECT f_int2, COUNT(*) AS `getFieldCount` FROM PCOLLECTION GROUP BY f_int2"; PCollection<Row> result = input.apply("testAggregationWithoutWindow", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int2").addInt64Field("size").build(); Row row = Row.withSchema(resultType).addValues(0, 4L).build(); PAssert.that(result).containsInAnyOrder(row); pipeline.run().waitUntilFinish(); }
private void runDistinct(PCollection<Row> input) throws Exception { String sql = "SELECT distinct f_int, f_long FROM PCOLLECTION "; PCollection<Row> result = input.apply("testDistinct", SqlTransform.query(sql)); Schema resultType = Schema.builder().addInt32Field("f_int").addInt64Field("f_long").build(); List<Row> expectedRows = TestUtils.RowsBuilder.of(resultType) .addRows( 1, 1000L, 2, 2000L, 3, 3000L, 4, 4000L) .getRows(); PAssert.that(result).containsInAnyOrder(expectedRows); pipeline.run().waitUntilFinish(); }
@Test public void testLength() throws Exception { Schema resultType = Schema.builder().addInt64Field("field").build(); Row resultRow = Row.withSchema(resultType).addValues(10L).build(); Row resultRow2 = Row.withSchema(resultType).addValues(0L).build(); Row resultRow3 = Row.withSchema(resultType).addValues(2L).build(); String sql = "SELECT LENGTH(f_bytes) FROM PCOLLECTION WHERE f_func = 'LENGTH'"; PCollection<Row> result = boundedInputBytes.apply("testUdf", SqlTransform.query(sql)); PAssert.that(result).containsInAnyOrder(resultRow, resultRow2, resultRow3); pipeline.run().waitUntilFinish(); }
@Test public void testNestedMapsNotEquivalent() { Schema nestedSchema1 = Schema.builder().addInt64Field("foo").build(); Schema nestedSchema2 = Schema.builder().addStringField("foo").build(); Schema schema1 = Schema.builder().addMapField("foo", FieldType.STRING, FieldType.row(nestedSchema1)).build(); Schema schema2 = Schema.builder().addMapField("foo", FieldType.STRING, FieldType.row(nestedSchema2)).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); } }
@Test public void testNestedNotEquivalent() { Schema nestedSchema1 = Schema.builder().addInt64Field("foo").build(); Schema nestedSchema2 = Schema.builder().addStringField("foo").build(); Schema schema1 = Schema.builder().addRowField("foo", nestedSchema1).build(); Schema schema2 = Schema.builder().addRowField("foo", nestedSchema2).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); }
@Test public void testToEnumerable_collectSingle() { Schema schema = Schema.builder().addInt64Field("id").build(); RelDataType type = CalciteUtils.toCalciteRowType(schema, TYPE_FACTORY); ImmutableList<ImmutableList<RexLiteral>> tuples = ImmutableList.of(ImmutableList.of(rexBuilder.makeBigintLiteral(BigDecimal.ZERO))); BeamRelNode node = new BeamValuesRel(cluster, type, tuples, null); Enumerable<Object> enumerable = BeamEnumerableConverter.toEnumerable(options, node); Enumerator<Object> enumerator = enumerable.enumerator(); assertTrue(enumerator.moveNext()); assertEquals(0L, enumerator.current()); assertFalse(enumerator.moveNext()); enumerator.close(); }
@Test public void testNestedArraysNotEquivalent() { Schema nestedSchema1 = Schema.builder().addInt64Field("foo").build(); Schema nestedSchema2 = Schema.builder().addStringField("foo").build(); Schema schema1 = Schema.builder().addArrayField("foo", FieldType.row(nestedSchema1)).build(); Schema schema2 = Schema.builder().addArrayField("foo", FieldType.row(nestedSchema2)).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); }