private FieldAccessDescriptor resolvedNestedFieldsHelper( Field field, FieldAccessDescriptor subDescriptor) { return subDescriptor.resolve(getFieldSchema(field)); }
FieldAggregation( FieldAccessDescriptor fieldsToAggregate, Field outputField, CombineFn<FieldT, AccumT, OutputT> fn, TupleTag<Object> combineTag, Schema aggregationSchema, @Nullable Schema inputSchema) { if (inputSchema != null) { this.fieldsToAggregate = fieldsToAggregate.resolve(inputSchema); this.inputSubSchema = Select.getOutputSchema(inputSchema, this.fieldsToAggregate); this.unnestedInputSubSchema = Unnest.getUnnestedSchema(inputSubSchema); this.needsUnnesting = !inputSchema.equals(unnestedInputSubSchema); } else { this.fieldsToAggregate = fieldsToAggregate; this.inputSubSchema = null; this.unnestedInputSubSchema = null; this.needsUnnesting = false; } this.outputField = outputField; this.fn = fn; this.combineTag = combineTag; this.aggregationSchema = aggregationSchema; }
fieldAccessDescriptor.resolve(((SchemaCoder<?>) inputCoder).getSchema());
fieldAccessDescriptor.withOrderByFieldInsertionOrder().resolve(schema); Schema currentKeySchema = Select.getOutputSchema(schema, resolved); if (keySchema == null) {
@Override public PCollection<Row> expand(PCollection<T> input) { Schema inputSchema = input.getSchema(); FieldAccessDescriptor resolved = fieldAccessDescriptor.resolve(inputSchema); Schema outputSchema = getOutputSchema(inputSchema, resolved); PCollection<Row> selected = input .apply( ParDo.of( new DoFn<T, Row>() { // TODO: This should be the same as resolved so that Beam knows which fields // are being accessed. Currently Beam only supports wildcard descriptors. // Once BEAM-4457 is fixed, fix this. @FieldAccess("filterFields") final FieldAccessDescriptor fieldAccessDescriptor = FieldAccessDescriptor.withAllFields(); @ProcessElement public void process( @FieldAccess("filterFields") Row row, OutputReceiver<Row> r) { r.output(selectRow(row, resolved, inputSchema, outputSchema)); } })) .setRowSchema(outputSchema); return selected; }
@Test public void testAllFields() { FieldAccessDescriptor fieldAccessDescriptor = FieldAccessDescriptor.withAllFields(); assertTrue(fieldAccessDescriptor.resolve(SIMPLE_SCHEMA).allFields()); }
@Test public void testArrayNestedField() { FieldAccessDescriptor level1 = FieldAccessDescriptor.withFieldNames("field2"); FieldAccessDescriptor level2 = FieldAccessDescriptor.withFieldNames("field1").withNestedField("field1", level1); FieldAccessDescriptor resolved = level2.resolve(NESTED_ARRAY_SCHEMA); assertTrue(resolved.fieldIdsAccessed().isEmpty()); assertEquals(1, resolved.nestedFields().size()); resolved = resolved.nestedFields().get(1); assertEquals(Sets.newHashSet(2), resolved.fieldIdsAccessed()); }
@Test public void testNestedFieldById() { FieldAccessDescriptor fieldAccessDescriptor = FieldAccessDescriptor.withFieldNames("field1") .withNestedField(1, FieldAccessDescriptor.withAllFields()); fieldAccessDescriptor = fieldAccessDescriptor.resolve(NESTED_SCHEMA2); assertTrue(fieldAccessDescriptor.fieldIdsAccessed().isEmpty()); assertEquals(1, fieldAccessDescriptor.nestedFields().size()); FieldAccessDescriptor nestedAccess = fieldAccessDescriptor.nestedFields().get(1); assertTrue(nestedAccess.allFields()); }
@Test public void testFieldNames() { FieldAccessDescriptor fieldAccessDescriptor = FieldAccessDescriptor.withFieldNames("field0", "field2").resolve(SIMPLE_SCHEMA); assertEquals(Sets.newHashSet(0, 2), fieldAccessDescriptor.fieldIdsAccessed()); }
@Test public void testFieldIds() { FieldAccessDescriptor fieldAccessDescriptor = FieldAccessDescriptor.withFieldIds(1, 3).resolve(SIMPLE_SCHEMA); assertEquals(Sets.newHashSet(1, 3), fieldAccessDescriptor.fieldIdsAccessed()); }
@Test public void testNestedFieldByName() { FieldAccessDescriptor fieldAccessDescriptor = FieldAccessDescriptor.withFieldNames("field1") .withNestedField("field1", FieldAccessDescriptor.withAllFields()); fieldAccessDescriptor = fieldAccessDescriptor.resolve(NESTED_SCHEMA2); assertTrue(fieldAccessDescriptor.fieldIdsAccessed().isEmpty()); assertEquals(1, fieldAccessDescriptor.nestedFields().size()); FieldAccessDescriptor nestedAccess = fieldAccessDescriptor.nestedFields().get(1); assertTrue(nestedAccess.allFields()); }
fieldAccessDescriptor = fieldAccessDescriptor.resolve(schemaCoder.getSchema());
@Test public void testMapNestedField() { FieldAccessDescriptor level1 = FieldAccessDescriptor.withFieldNames("field2"); FieldAccessDescriptor level2 = FieldAccessDescriptor.withFieldNames("field1").withNestedField("field1", level1); FieldAccessDescriptor resolved = level2.resolve(NESTED_MAP_SCHEMA); assertTrue(resolved.fieldIdsAccessed().isEmpty()); assertEquals(1, resolved.nestedFields().size()); resolved = resolved.nestedFields().get(1); assertEquals(Sets.newHashSet(2), resolved.fieldIdsAccessed()); } }
@Override public PCollection<KV<Row, Iterable<InputT>>> expand(PCollection<InputT> input) { Schema schema = input.getSchema(); FieldAccessDescriptor resolved = fieldAccessDescriptor.resolve(schema); keySchema = Select.getOutputSchema(schema, resolved); return input .apply( "Group by fields", ParDo.of( new DoFn<InputT, KV<Row, InputT>>() { @ProcessElement public void process( @Element InputT element, @Element Row row, OutputReceiver<KV<Row, InputT>> o) { o.output(KV.of(Select.selectRow(row, resolved, schema, keySchema), element)); } })) .setCoder(KvCoder.of(SchemaCoder.of(keySchema), input.getCoder())) .apply(GroupByKey.create()); } }
@Test public void testPartialAccessNestedField() { FieldAccessDescriptor level1 = FieldAccessDescriptor.withFieldNames("field2"); FieldAccessDescriptor level2 = FieldAccessDescriptor.withFieldNames("field1").withNestedField("field1", level1); FieldAccessDescriptor level3 = FieldAccessDescriptor.withFieldNames("field1").withNestedField("field1", level2); FieldAccessDescriptor resolved = level3.resolve(NESTED_SCHEMA2); assertTrue(resolved.fieldIdsAccessed().isEmpty()); assertEquals(1, resolved.nestedFields().size()); resolved = resolved.nestedFields().get(1); assertTrue(resolved.fieldIdsAccessed().isEmpty()); assertEquals(1, resolved.nestedFields().size()); resolved = resolved.nestedFields().get(1); assertEquals(Sets.newHashSet(2), resolved.fieldIdsAccessed()); }