@Test public void testParsesArrayOfArrays() throws Exception { Schema schema = Schema.builder() .addArrayField("f_arrayOfIntArrays", FieldType.array(FieldType.INT32)) .build(); String rowString = "{\n" + "\"f_arrayOfIntArrays\" : [ [1, 2], [3, 4], [5]]\n" + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); Row parsedRow = newObjectMapperWith(deserializer).readValue(rowString, Row.class); Row expectedRow = Row.withSchema(schema) .addArray(Arrays.asList(1, 2), Arrays.asList(3, 4), Arrays.asList(5)) .build(); assertEquals(expectedRow, parsedRow); }
@Test public void testParsesArrayField() throws Exception { Schema schema = Schema.builder() .addInt32Field("f_int32") .addArrayField("f_intArray", FieldType.INT32) .build(); String rowString = "{\n" + "\"f_int32\" : 32,\n" + "\"f_intArray\" : [ 1, 2, 3, 4, 5]\n" + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); Row parsedRow = newObjectMapperWith(deserializer).readValue(rowString, Row.class); Row expectedRow = Row.withSchema(schema).addValues(32, Arrays.asList(1, 2, 3, 4, 5)).build(); assertEquals(expectedRow, parsedRow); }
@Test public void testThrowsForMismatchedArrayField() throws Exception { Schema schema = Schema.builder() .addArrayField("f_arrayOfIntArrays", FieldType.array(FieldType.INT32)) .build(); String rowString = "{\n" + "\"f_arrayOfIntArrays\" : { }\n" // expect array, get object + "}"; RowJsonDeserializer deserializer = RowJsonDeserializer.forSchema(schema); thrown.expect(UnsupportedRowJsonException.class); thrown.expectMessage("Expected JSON array"); newObjectMapperWith(deserializer).readValue(rowString, Row.class); }
Schema.builder().addArrayField("f_nestedArray", Schema.FieldType.STRING).build(); .addStringField("f_nestedString") .addInt32Field("f_nestedIntPlusOne") .addArrayField("f_nestedArray", Schema.FieldType.STRING) .build();
.addInt64Field("field1_sum") .addInt32Field("field3_sum") .addArrayField("field1_top", FieldType.INT64) .build();
Schema.builder() .addInt32Field("f_int") .addArrayField("f_arrayOfRows", Schema.FieldType.row(elementSchema)) .build();
.addStringField("f_nestedString") .addInt32Field("f_nestedIntPlusOne") .addArrayField("f_nestedArray", Schema.FieldType.STRING) .build();
.addInt64Field("field1_sum") .addInt32Field("field3_sum") .addArrayField("field1_top", FieldType.INT64) .build();
.addInt64Field("field1_sum") .addInt32Field("field3_sum") .addArrayField("field1_top", FieldType.INT64) .build(); Row expectedRow = Row.withSchema(aggregateSchema).addValues(10L, 14).addArray(4L).build();
Schema.builder() .addInt32Field("f_int") .addArrayField("f_arrayOfRows", Schema.FieldType.row(elementSchema)) .build();
@Test public void testEquivalent() { final Schema expectedNested1 = Schema.builder().addStringField("yard1").addInt64Field("yard2").build(); final Schema expectedSchema1 = Schema.builder() .addStringField("field1") .addInt64Field("field2") .addRowField("field3", expectedNested1) .addArrayField("field4", FieldType.row(expectedNested1)) .addMapField("field5", FieldType.STRING, FieldType.row(expectedNested1)) .build(); final Schema expectedNested2 = Schema.builder().addInt64Field("yard2").addStringField("yard1").build(); final Schema expectedSchema2 = Schema.builder() .addMapField("field5", FieldType.STRING, FieldType.row(expectedNested2)) .addArrayField("field4", FieldType.row(expectedNested2)) .addRowField("field3", expectedNested2) .addInt64Field("field2") .addStringField("field1") .build(); assertNotEquals(expectedSchema1, expectedSchema2); assertTrue(expectedSchema1.equivalent(expectedSchema2)); }
@Test @Category(NeedsRunner.class) public void testGloballyWithSchemaAggregateFn() { Collection<AggregatePojos> elements = ImmutableList.of( new AggregatePojos(1, 1, 2), new AggregatePojos(2, 1, 3), new AggregatePojos(3, 2, 4), new AggregatePojos(4, 2, 5)); PCollection<Row> aggregate = pipeline .apply(Create.of(elements)) .apply( Group.<AggregatePojos>globally() .aggregateField("field1", Sum.ofLongs(), "field1_sum") .aggregateField("field3", Sum.ofIntegers(), "field3_sum") .aggregateField("field1", Top.largestLongsFn(1), "field1_top")); Schema aggregateSchema = Schema.builder() .addInt64Field("field1_sum") .addInt32Field("field3_sum") .addArrayField("field1_top", FieldType.INT64) .build(); Row expectedRow = Row.withSchema(aggregateSchema).addValues(10L, 14).addArray(4L).build(); PAssert.that(aggregate).containsInAnyOrder(expectedRow); pipeline.run(); }
@Test public void testSelectArrayValue() { PCollection<Row> input = pCollectionOf2Elements(); Schema resultType = Schema.builder() .addInt32Field("f_int") .addArrayField("f_arr", Schema.FieldType.STRING) .build(); PCollection<Row> result = input.apply( "sqlQuery", SqlTransform.query("SELECT 42, ARRAY ['aa', 'bb'] as `f_arr` FROM PCOLLECTION")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(resultType).addValues(42, Arrays.asList("aa", "bb")).build(), Row.withSchema(resultType).addValues(42, Arrays.asList("aa", "bb")).build()); pipeline.run(); }
@Test public void testProjectArrayField() { PCollection<Row> input = pCollectionOf2Elements(); Schema resultType = Schema.builder() .addInt32Field("f_int") .addArrayField("f_stringArr", Schema.FieldType.STRING) .build(); PCollection<Row> result = input.apply("sqlQuery", SqlTransform.query("SELECT f_int, f_stringArr FROM PCOLLECTION")); PAssert.that(result) .containsInAnyOrder( Row.withSchema(resultType).addValues(1).addArray(Arrays.asList("111", "222")).build(), Row.withSchema(resultType) .addValues(2) .addArray(Arrays.asList("33", "44", "55")) .build()); pipeline.run(); }
@Test public void testArrayOfRow() throws Exception { Schema nestedSchema = Schema.builder().addInt32Field("f1_int").addStringField("f1_str").build(); FieldType collectionElementType = FieldType.row(nestedSchema); Schema schema = Schema.builder().addArrayField("f_array", collectionElementType).build(); Row row = Row.withSchema(schema) .addArray( Row.withSchema(nestedSchema).addValues(1, "one").build(), Row.withSchema(nestedSchema).addValues(2, "two").build(), Row.withSchema(nestedSchema).addValues(3, "three").build()) .build(); CoderProperties.coderDecodeEncodeEqual(RowCoder.of(schema), row); }
@Test public void testArrays() throws Exception { Schema schema = Schema.builder().addArrayField("f_array", FieldType.STRING).build(); Row row = Row.withSchema(schema).addArray("one", "two", "three", "four").build(); CoderProperties.coderDecodeEncodeEqual(RowCoder.of(schema), row); }
@Test public void testThrowsForUnsupportedNestedFieldType() throws Exception { Schema nestedSchema = Schema.builder().addArrayField("f_dateTimeArray", FieldType.DATETIME).build(); Schema schema = Schema.builder().addRowField("f_nestedRow", nestedSchema).build(); thrown.expect(UnsupportedRowJsonException.class); thrown.expectMessage("DATETIME is not supported"); RowJsonDeserializer.forSchema(schema); }
@Test public void testArrayNotEquivalent() { Schema schema1 = Schema.builder().addArrayField("foo", FieldType.BOOLEAN).build(); Schema schema2 = Schema.builder().addArrayField("foo", FieldType.DATETIME).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); }
@Test public void testThrowsForUnsupportedArrayElementType() throws Exception { Schema schema = Schema.builder().addArrayField("f_dateTimeArray", FieldType.DATETIME).build(); thrown.expect(UnsupportedRowJsonException.class); thrown.expectMessage("DATETIME is not supported"); RowJsonDeserializer.forSchema(schema); }
@Test public void testNestedArraysNotEquivalent() { Schema nestedSchema1 = Schema.builder().addInt64Field("foo").build(); Schema nestedSchema2 = Schema.builder().addStringField("foo").build(); Schema schema1 = Schema.builder().addArrayField("foo", FieldType.row(nestedSchema1)).build(); Schema schema2 = Schema.builder().addArrayField("foo", FieldType.row(nestedSchema2)).build(); assertNotEquals(schema1, schema2); assertFalse(schema1.equivalent(schema2)); }