/** Return the count of fields. */ public int getFieldCount() { return getFields().size(); } }
/** Return a field by index. */ public Field getField(int index) { return getFields().get(index); }
/** Return the list of all field names. */ public List<String> getFieldNames() { return getFields().stream().map(Schema.Field::getName).collect(Collectors.toList()); }
/** Returns true if two Schemas have the same fields in the same order. */ @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } Schema other = (Schema) o; // If both schemas have a UUID set, we can simply compare the UUIDs. if (uuid != null && other.uuid != null) { return Objects.equals(uuid, other.uuid); } return Objects.equals(fieldIndices, other.fieldIndices) && Objects.equals(getFields(), other.getFields()); }
/** Creates a deserializer for a {@link Row} {@link Schema}. */ public static RowJsonDeserializer forSchema(Schema schema) { schema.getFields().forEach(RowJsonValidation::verifyFieldTypeSupported); return new RowJsonDeserializer(schema); }
public Field getField(String name) { return getFields().get(indexOf(name)); }
private void verifyDeterministic(Schema schema) throws org.apache.beam.sdk.coders.Coder.NonDeterministicException { List<Coder<?>> coders = schema .getFields() .stream() .map(Field::getType) .map(RowCoder::coderForFieldType) .collect(Collectors.toList()); Coder.verifyDeterministic(this, "All fields must have deterministic encoding", coders); }
public static List<FieldValueGetter> getGetters(Class<?> clazz, Schema schema) { // Return the getters ordered by their position in the schema. return CACHED_GETTERS.computeIfAbsent( new ClassWithSchema(clazz, schema), c -> { Map<String, FieldValueGetter> getterMap = ReflectUtils.getFields(clazz) .stream() .map(POJOUtils::createGetter) .collect(Collectors.toMap(FieldValueGetter::name, Function.identity())); return schema .getFields() .stream() .map(f -> getterMap.get(f.getName())) .collect(Collectors.toList()); }); }
@Override public Row apply(SchemaAndRecord input) { GenericRecord record = input.getRecord(); checkState( schema.getFields().size() == record.getSchema().getFields().size(), "Schema sizes are different."); return toBeamRow(record, schema); } }
public static List<FieldValueSetter> getSetters(Class<?> clazz, Schema schema) { // Return the setters, ordered by their position in the schema. return CACHED_SETTERS.computeIfAbsent( new ClassWithSchema(clazz, schema), c -> { Map<String, FieldValueSetter> setterMap = ReflectUtils.getFields(clazz) .stream() .map(POJOUtils::createSetter) .collect(Collectors.toMap(FieldValueSetter::name, Function.identity())); return schema .getFields() .stream() .map(f -> setterMap.get(f.getName())) .collect(Collectors.toList()); }); }
private static Object[] rowToAvatica(Row row) { Schema schema = row.getSchema(); Object[] convertedColumns = new Object[schema.getFields().size()]; int i = 0; for (Schema.Field field : schema.getFields()) { convertedColumns[i] = fieldToAvatica(field.getType(), row.getValue(i)); ++i; } return convertedColumns; }
private Schema buildNullSchema(Schema schema) { Schema.Builder builder = Schema.builder(); builder.addFields( schema.getFields().stream().map(f -> f.withNullable(true)).collect(Collectors.toList())); return builder.build(); }
/** * Get values for fields in the same order they're specified in schema, including timestamp, * payload, and attributes. */ private List<Object> getFieldValues(ProcessContext context) { return messageSchema() .getFields() .stream() .map(field -> getValueForField(field, context.timestamp(), context.element())) .collect(toList()); }
/** * Tries to parse the JSON {@link TableRow} from BigQuery. * * <p>Only supports basic types and arrays. Doesn't support date types. */ public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jsonBqRow) { List<TableFieldSchema> bqFields = bqSchema.getFields(); Map<String, Integer> bqFieldIndices = IntStream.range(0, bqFields.size()) .boxed() .collect(toMap(i -> bqFields.get(i).getName(), i -> i)); List<Object> rawJsonValues = rowSchema .getFields() .stream() .map(field -> bqFieldIndices.get(field.getName())) .map(index -> jsonBqRow.getF().get(index).getV()) .collect(toList()); return IntStream.range(0, rowSchema.getFieldCount()) .boxed() .map(index -> toBeamValue(rowSchema.getField(index).getType(), rawJsonValues.get(index))) .collect(toRow(rowSchema)); }
private String unparseRow(FieldType fieldType) { return "ROW<" + fieldType .getRowSchema() .getFields() .stream() .map(field -> field.getName() + " " + unparse(field.getType())) .collect(joining(",")) + ">"; } }
/** * Strict conversion from AVRO to Beam, strict because it doesn't do widening or narrowing during * conversion. */ public static Row toRowStrict(@Nonnull GenericRecord record, @Nonnull Schema schema) { Row.Builder builder = Row.withSchema(schema); org.apache.avro.Schema avroSchema = record.getSchema(); for (Schema.Field field : schema.getFields()) { Object value = record.get(field.getName()); org.apache.avro.Schema fieldAvroSchema = avroSchema.getField(field.getName()).schema(); if (value == null) { builder.addValue(null); } else { builder.addValue(convertAvroFieldStrict(value, fieldAvroSchema, field.getType())); } } return builder.build(); }
private static Schema getUnnestedSchema( Schema schema, List<String> nameComponents, SerializableFunction<List<String>, String> fn) { Schema.Builder builder = Schema.builder(); for (Field field : schema.getFields()) { nameComponents.add(field.getName()); if (field.getType().getTypeName().isCompositeType()) { Schema nestedSchema = getUnnestedSchema(field.getType().getRowSchema(), nameComponents, fn); for (Field nestedField : nestedSchema.getFields()) { builder.addField(nestedField); } } else { String name = fn.apply(nameComponents); Field newField = field.toBuilder().setName(name).build(); builder.addField(newField); } nameComponents.remove(nameComponents.size() - 1); } return builder.build(); } /** Unnest a row. */
private static Row jsonObjectToRow(FieldValue rowFieldValue) { if (!rowFieldValue.isJsonObject()) { throw new UnsupportedRowJsonException( "Expected JSON object for field '" + rowFieldValue.name() + "'. " + "Unable to convert '" + rowFieldValue.jsonValue().asText() + "'" + " to Beam Row, it is not a JSON object. Currently only JSON objects " + "can be parsed to Beam Rows"); } return rowFieldValue .rowSchema() .getFields() .stream() .map( schemaField -> extractJsonNodeValue( FieldValue.of( schemaField.getName(), schemaField.getType(), rowFieldValue.jsonFieldValue(schemaField.getName())))) .collect(toRow(rowFieldValue.rowSchema())); }
private static DynamicType.Builder<Coder> implementMethods( Schema schema, DynamicType.Builder<Coder> builder) { boolean hasNullableFields = schema.getFields().stream().anyMatch(Field::getNullable); return builder .defineMethod("getSchema", Schema.class, Visibility.PRIVATE, Ownership.STATIC) .intercept(FixedValue.reference(schema)) .defineMethod("hasNullableFields", boolean.class, Visibility.PRIVATE, Ownership.STATIC) .intercept(FixedValue.reference(hasNullableFields)) .method(ElementMatchers.named("encode")) .intercept(new EncodeInstruction()) .method(ElementMatchers.named("decode")) .intercept(new DecodeInstruction()); }
static void verifyFieldTypeSupported(Schema.FieldType fieldType) { Schema.TypeName fieldTypeName = fieldType.getTypeName(); if (fieldTypeName.isCompositeType()) { Schema rowFieldSchema = fieldType.getRowSchema(); rowFieldSchema.getFields().forEach(RowJsonValidation::verifyFieldTypeSupported); return; } if (fieldTypeName.isCollectionType()) { verifyFieldTypeSupported(fieldType.getCollectionElementType()); return; } if (!SUPPORTED_TYPES.contains(fieldTypeName)) { throw new RowJsonDeserializer.UnsupportedRowJsonException( fieldTypeName.name() + " is not supported when converting JSON objects to Rows. " + "Supported types are: " + SUPPORTED_TYPES.toString()); } } }