public static List<FieldValueGetter> getGetters(Class<?> clazz, Schema schema) { // Return the getters ordered by their position in the schema. return CACHED_GETTERS.computeIfAbsent( new ClassWithSchema(clazz, schema), c -> { Map<String, FieldValueGetter> getterMap = ReflectUtils.getFields(clazz) .stream() .map(POJOUtils::createGetter) .collect(Collectors.toMap(FieldValueGetter::name, Function.identity())); return schema .getFields() .stream() .map(f -> getterMap.get(f.getName())) .collect(Collectors.toList()); }); }
public static List<FieldValueSetter> getSetters(Class<?> clazz, Schema schema) { // Return the setters, ordered by their position in the schema. return CACHED_SETTERS.computeIfAbsent( new ClassWithSchema(clazz, schema), c -> { Map<String, FieldValueSetter> setterMap = ReflectUtils.getFields(clazz) .stream() .map(POJOUtils::createSetter) .collect(Collectors.toMap(FieldValueSetter::name, Function.identity())); return schema .getFields() .stream() .map(f -> setterMap.get(f.getName())) .collect(Collectors.toList()); }); }
/** * Tries to parse the JSON {@link TableRow} from BigQuery. * * <p>Only supports basic types and arrays. Doesn't support date types. */ public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jsonBqRow) { List<TableFieldSchema> bqFields = bqSchema.getFields(); Map<String, Integer> bqFieldIndices = IntStream.range(0, bqFields.size()) .boxed() .collect(toMap(i -> bqFields.get(i).getName(), i -> i)); List<Object> rawJsonValues = rowSchema .getFields() .stream() .map(field -> bqFieldIndices.get(field.getName())) .map(index -> jsonBqRow.getF().get(index).getV()) .collect(toList()); return IntStream.range(0, rowSchema.getFieldCount()) .boxed() .map(index -> toBeamValue(rowSchema.getField(index).getType(), rawJsonValues.get(index))) .collect(toRow(rowSchema)); }
static Schema getOutputSchema(Schema inputSchema, FieldAccessDescriptor fieldAccessDescriptor) { if (fieldAccessDescriptor.allFields()) { return inputSchema; } Schema.Builder builder = new Schema.Builder(); for (int fieldId : fieldAccessDescriptor.fieldIdsAccessed()) { builder.addField(inputSchema.getField(fieldId)); } for (Map.Entry<Integer, FieldAccessDescriptor> nested : fieldAccessDescriptor.nestedFields().entrySet()) { Field field = inputSchema.getField(nested.getKey()); FieldAccessDescriptor nestedDescriptor = nested.getValue(); FieldType nestedType = FieldType.row(getOutputSchema(field.getType().getRowSchema(), nestedDescriptor)); if (field.getNullable()) { builder.addNullableField(field.getName(), nestedType); } else { builder.addField(field.getName(), nestedType); } } return builder.build(); }
private List<Object> verify(Schema schema, List<Object> values) { List<Object> verifiedValues = Lists.newArrayListWithCapacity(values.size()); if (schema.getFieldCount() != values.size()) { throw new IllegalArgumentException( String.format( "Field count in Schema (%s) and values (%s) must match", schema.getFieldNames(), values)); } for (int i = 0; i < values.size(); ++i) { Object value = values.get(i); Schema.Field field = schema.getField(i); if (value == null) { if (!field.getNullable()) { throw new IllegalArgumentException( String.format("Field %s is not nullable", field.getName())); } verifiedValues.add(null); } else { verifiedValues.add(verify(value, field.getType(), field.getName())); } } return verifiedValues; }
/** * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during * conversion. */ public static Row toRowStrict(@Nonnull GenericRecord record, @Nonnull Schema schema) { Row.Builder builder = Row.withSchema(schema); org.apache.avro.Schema avroSchema = record.getSchema(); for (Schema.Field field : schema.getFields()) { Object value = record.get(field.getName()); org.apache.avro.Schema fieldAvroSchema = avroSchema.getField(field.getName()).schema(); if (value == null) { builder.addValue(null); } else { builder.addValue(convertAvroFieldStrict(value, fieldAvroSchema, field.getType())); } } return builder.build(); }
public static Row castRow(Row input, Schema inputSchema, Schema outputSchema) { if (input == null) { return null; } Row.Builder output = Row.withSchema(outputSchema); for (int i = 0; i < outputSchema.getFieldCount(); i++) { Schema.Field outputField = outputSchema.getField(i); int fromFieldIdx = inputSchema.indexOf(outputField.getName()); Schema.Field inputField = inputSchema.getField(fromFieldIdx); Object inputValue = input.getValue(fromFieldIdx); Object outputValue = castValue(inputValue, inputField.getType(), outputField.getType()); output.addValue(outputValue); } return output.build(); }
private Object getValueForField( Schema.Field field, Instant timestamp, PubsubMessage pubsubMessage) { switch (field.getName()) { case TIMESTAMP_FIELD: return timestamp; case ATTRIBUTES_FIELD: return pubsubMessage.getAttributeMap(); case PAYLOAD_FIELD: return parsePayloadJsonRow(pubsubMessage); default: throw new IllegalArgumentException( "Unexpected field '" + field.getName() + "' in top level schema" + " for Pubsub message. Top level schema should only contain " + "'timestamp', 'attributes', and 'payload' fields"); } }
@Test public void testCollector() { Schema schema = Stream.of( Schema.Field.of("f_int", FieldType.INT32), Schema.Field.of("f_string", FieldType.STRING)) .collect(toSchema()); assertEquals(2, schema.getFieldCount()); assertEquals("f_int", schema.getField(0).getName()); assertEquals(FieldType.INT32, schema.getField(0).getType()); assertEquals("f_string", schema.getField(1).getName()); assertEquals(FieldType.STRING, schema.getField(1).getType()); }
@Override public int hashCode() { return Objects.hash(getName(), getDescription(), getType(), getNullable()); } }
public Schema(List<Field> fields) { this.fields = fields; int index = 0; for (Field field : fields) { if (fieldIndices.get(field.getName()) != null) { throw new IllegalArgumentException( "Duplicate field " + field.getName() + " added to schema"); } fieldIndices.put(field.getName(), index++); } this.hashCode = Objects.hash(fieldIndices, fields); }
@Test public void testArrayOfRowSchema() { Schema nestedSchema = Schema.of(Field.of("f1_str", FieldType.STRING)); FieldType arrayType = FieldType.array(FieldType.row(nestedSchema)); Schema schema = Schema.of(Field.of("f_array", arrayType)); Field field = schema.getField("f_array"); assertEquals("f_array", field.getName()); assertEquals(arrayType, field.getType()); }
@Test public void testNestedSchema() { Schema nestedSchema = Schema.of(Field.of("f1_str", FieldType.STRING)); Schema schema = Schema.of(Field.of("nested", FieldType.row(nestedSchema))); Field inner = schema.getField("nested").getType().getRowSchema().getField("f1_str"); assertEquals("f1_str", inner.getName()); assertEquals(FieldType.STRING, inner.getType()); }
@Override public boolean equals(Object o) { if (!(o instanceof Field)) { return false; } Field other = (Field) o; return Objects.equals(getName(), other.getName()) && Objects.equals(getDescription(), other.getDescription()) && Objects.equals(getType(), other.getType()) && Objects.equals(getNullable(), other.getNullable()); }
@Test public void testNestedArraySchema() { FieldType arrayType = FieldType.array(FieldType.array(FieldType.STRING)); Schema schema = Schema.of(Field.of("f_array", arrayType)); Field field = schema.getField("f_array"); assertEquals("f_array", field.getName()); assertEquals(arrayType, field.getType()); }
@Test public void testArraySchema() { FieldType arrayType = FieldType.array(FieldType.STRING); Schema schema = Schema.of(Field.of("f_array", arrayType)); Field field = schema.getField("f_array"); assertEquals("f_array", field.getName()); assertEquals(arrayType, field.getType()); }
private void unparseColumn(SqlWriter writer, Schema.Field column) { writer.sep(","); writer.identifier(column.getName()); writer.identifier(CalciteUtils.toSqlTypeName(column.getType()).name()); if (column.getNullable() != null && !column.getNullable()) { writer.keyword("NOT NULL"); } if (column.getDescription() != null) { writer.keyword("COMMENT"); writer.literal(column.getDescription()); } }
/** Create an instance of {@code RelDataType} so it can be used to create a table. */ public static RelDataType toCalciteRowType(Schema schema, RelDataTypeFactory dataTypeFactory) { RelDataTypeFactory.Builder builder = new RelDataTypeFactory.Builder(dataTypeFactory); IntStream.range(0, schema.getFieldCount()) .forEach( idx -> builder.add( schema.getField(idx).getName(), toRelDataType(dataTypeFactory, schema, idx))); return builder.build(); }
private String unparseRow(FieldType fieldType) { return "ROW<" + fieldType .getRowSchema() .getFields() .stream() .map(field -> field.getName() + " " + unparse(field.getType())) .collect(joining(",")) + ">"; } }
private boolean equivalent(Field otherField, EquivalenceNullablePolicy nullablePolicy) { if (nullablePolicy == EquivalenceNullablePolicy.SAME && !otherField.getNullable().equals(getNullable())) { return false; } else if (nullablePolicy == EquivalenceNullablePolicy.WEAKEN) { if (getNullable() && !otherField.getNullable()) { return false; } } return otherField.getName().equals(getName()) && getType().equivalent(otherField.getType()); }