private static int validateFieldId(Schema schema, int fieldId) { if (fieldId < 0 || fieldId >= schema.getFieldCount()) { throw new IllegalArgumentException("Invalid field id " + fieldId + " for schema " + schema); } return fieldId; } }
private boolean equivalent(Schema other, EquivalenceNullablePolicy nullablePolicy) { if (other.getFieldCount() != getFieldCount()) { return false; } List<Field> otherFields = other .getFields() .stream() .sorted(Comparator.comparing(Field::getName)) .collect(Collectors.toList()); List<Field> actualFields = getFields() .stream() .sorted(Comparator.comparing(Field::getName)) .collect(Collectors.toList()); for (int i = 0; i < otherFields.size(); ++i) { Field otherField = otherFields.get(i); Field actualField = actualFields.get(i); if (!otherField.equivalent(actualField, nullablePolicy)) { return false; } } return true; }
/** Creates a {@link Row} from the list of values and {@link #getSchema()}. */ public static <T> Collector<T, List<Object>, Row> toRow(Schema schema) { return Collector.of( () -> new ArrayList<>(schema.getFieldCount()), List::add, (left, right) -> { left.addAll(right); return left; }, values -> Row.withSchema(schema).addValues(values).build()); }
/** * Convenient way to build a {@code BeamSqlRow}s. * * <p>e.g. * * <pre>{@code * buildRows( * schema, * 1, 1, 1, // the first row * 2, 2, 2, // the second row * ... * ) * }</pre> */ public static List<Row> buildRows(Schema type, List<?> rowsValues) { return Lists.partition(rowsValues, type.getFieldCount()) .stream() .map(values -> values.stream().collect(toRow(type))) .collect(toList()); } }
/** Returns true if two schemas are equal ignoring field names and descriptions. */ public boolean typesEqual(Schema other) { if (uuid != null && other.uuid != null && Objects.equals(uuid, other.uuid)) { return true; } if (getFieldCount() != other.getFieldCount()) { return false; } if (!Objects.equals(fieldIndices.values(), other.fieldIndices.values())) { return false; } for (int i = 0; i < getFieldCount(); ++i) { if (!getField(i).typesEqual(other.getField(i))) { return false; } } return true; }
/** Creates a new record filled with nulls. */ public static Row nullRow(Schema schema) { return Row.withSchema(schema) .addValues(Collections.nCopies(schema.getFieldCount(), null)) .build(); } }
/** Return the estimated serialized size of a give row object. */ public static long estimatedSizeBytes(Row row) { Schema schema = row.getSchema(); int fieldCount = schema.getFieldCount(); int bitmapSize = (((fieldCount - 1) >> 6) + 1) * 8; int fieldsSize = 0; for (int i = 0; i < schema.getFieldCount(); ++i) { fieldsSize += (int) estimatedSizeBytes(schema.getField(i).getType(), row.getValue(i)); } return (long) bitmapSize + fieldsSize; }
/** * Tries to parse the JSON {@link TableRow} from BigQuery. * * <p>Only supports basic types and arrays. Doesn't support date types. */ public static Row toBeamRow(Schema rowSchema, TableSchema bqSchema, TableRow jsonBqRow) { List<TableFieldSchema> bqFields = bqSchema.getFields(); Map<String, Integer> bqFieldIndices = IntStream.range(0, bqFields.size()) .boxed() .collect(toMap(i -> bqFields.get(i).getName(), i -> i)); List<Object> rawJsonValues = rowSchema .getFields() .stream() .map(field -> bqFieldIndices.get(field.getName())) .map(index -> jsonBqRow.getF().get(index).getV()) .collect(toList()); return IntStream.range(0, rowSchema.getFieldCount()) .boxed() .map(index -> toBeamValue(rowSchema.getField(index).getType(), rawJsonValues.get(index))) .collect(toRow(rowSchema)); }
private List<Object> verify(Schema schema, List<Object> values) { List<Object> verifiedValues = Lists.newArrayListWithCapacity(values.size()); if (schema.getFieldCount() != values.size()) { throw new IllegalArgumentException( String.format( "Field count in Schema (%s) and values (%s) must match", schema.getFieldNames(), values)); } for (int i = 0; i < values.size(); ++i) { Object value = values.get(i); Schema.Field field = schema.getField(i); if (value == null) { if (!field.getNullable()) { throw new IllegalArgumentException( String.format("Field %s is not nullable", field.getName())); } verifiedValues.add(null); } else { verifiedValues.add(verify(value, field.getType(), field.getName())); } } return verifiedValues; }
@ProcessElement public void processElement(ProcessContext c) { Row inputRow = c.element(); @Nullable List<Object> rawResultValues = executor.execute(inputRow, null, BeamSqlExpressionEnvironments.forRow(inputRow, null)); if (rawResultValues != null) { List<Object> castResultValues = IntStream.range(0, outputSchema.getFieldCount()) .mapToObj(i -> castField(rawResultValues, i)) .collect(toList()); c.output(Row.withSchema(outputSchema).addValues(castResultValues).build()); } }
List<Row> rows = new ArrayList<>(); for (CSVRecord rawRecord : parser.getRecords()) { if (rawRecord.size() != schema.getFieldCount()) { throw new IllegalArgumentException( String.format( "Expect %d fields, but actually %d", schema.getFieldCount(), rawRecord.size())); IntStream.range(0, schema.getFieldCount()) .mapToObj(idx -> autoCastField(schema.getField(idx), rawRecord.get(idx))) .collect(toRow(schema)));
/** Create an instance of {@code RelDataType} so it can be used to create a table. */ public static RelDataType toCalciteRowType(Schema schema, RelDataTypeFactory dataTypeFactory) { RelDataTypeFactory.Builder builder = new RelDataTypeFactory.Builder(dataTypeFactory); IntStream.range(0, schema.getFieldCount()) .forEach( idx -> builder.add( schema.getField(idx).getName(), toRelDataType(dataTypeFactory, schema, idx))); return builder.build(); }
private void validatePubsubMessageSchema(Table tableDefinition) { Schema schema = tableDefinition.getSchema(); if (schema.getFieldCount() != 3 || !fieldPresent(schema, TIMESTAMP_FIELD, TIMESTAMP) || !fieldPresent(schema, ATTRIBUTES_FIELD, Schema.FieldType.map(VARCHAR, VARCHAR)) || !(schema.hasField(PAYLOAD_FIELD) && ROW.equals(schema.getField(PAYLOAD_FIELD).getType().getTypeName()))) { throw new IllegalArgumentException( "Unsupported schema specified for Pubsub source in CREATE TABLE. " + "CREATE TABLE for Pubsub topic should define exactly the following fields: " + "'event_timestamp' field of type 'TIMESTAMP', 'attributes' field of type " + "MAP<VARCHAR, VARCHAR>, and 'payload' field of type 'ROW<...>' which matches the " + "payload JSON format."); } }
private PCollection<Row> joinAsLookup( BeamRelNode leftRelNode, BeamRelNode rightRelNode, PCollection<Row> factStream, Schema outputSchema) { BeamIOSourceRel srcRel = (BeamIOSourceRel) rightRelNode; BeamSqlSeekableTable seekableTable = (BeamSqlSeekableTable) srcRel.getBeamSqlTable(); return factStream.apply( "join_as_lookup", new BeamJoinTransforms.JoinAsLookup( condition, seekableTable, CalciteUtils.toSchema(rightRelNode.getRowType()), outputSchema, CalciteUtils.toSchema(leftRelNode.getRowType()).getFieldCount())); }
private static void unnestRow(Row input, Row.Builder output) { for (int i = 0; i < input.getSchema().getFieldCount(); ++i) { Field field = input.getSchema().getField(i); if (field.getType().getTypeName().isCompositeType()) { unnestRow(input.getRow(i), output); } else { output.addValue(input.getValue(i)); } } } /** A {@link PTransform} that unnests nested row. */
private static Void containsJoinedFields( List<KV<Row, Row>> expected, Iterable<KV<Row, Row>> actual) { List<Matcher<? super KV<Row, Row>>> matchers = Lists.newArrayList(); for (KV<Row, Row> row : expected) { List<Matcher> fieldMatchers = Lists.newArrayList(); Row value = row.getValue(); Schema valueSchema = value.getSchema(); for (int i = 0; i < valueSchema.getFieldCount(); ++i) { assertEquals(TypeName.ARRAY, valueSchema.getField(i).getType().getTypeName()); fieldMatchers.add(new ArrayFieldMatchesAnyOrder(i, value.getArray(i))); } matchers.add( KvMatcher.isKv(equalTo(row.getKey()), allOf(fieldMatchers.toArray(new Matcher[0])))); } assertThat(actual, containsInAnyOrder(matchers.toArray(new Matcher[0]))); return null; }
private static DynamicType.Builder<Coder> createComponentCoders( Schema schema, DynamicType.Builder<Coder> builder) { List<StackManipulation> componentCoders = Lists.newArrayListWithCapacity(schema.getFieldCount()); for (int i = 0; i < schema.getFieldCount(); i++) { componentCoders.add(getCoder(schema.getField(i).getType()));
public static Row castRow(Row input, Schema inputSchema, Schema outputSchema) { if (input == null) { return null; } Row.Builder output = Row.withSchema(outputSchema); for (int i = 0; i < outputSchema.getFieldCount(); i++) { Schema.Field outputField = outputSchema.getField(i); int fromFieldIdx = inputSchema.indexOf(outputField.getName()); Schema.Field inputField = inputSchema.getField(fromFieldIdx); Object inputValue = input.getValue(fromFieldIdx); Object outputValue = castValue(inputValue, inputField.getType(), outputField.getType()); output.addValue(outputValue); } return output.build(); }
private static List<TableFieldSchema> toTableFieldSchema(Schema schema) { List<TableFieldSchema> fields = new ArrayList<>(schema.getFieldCount()); for (Field schemaField : schema.getFields()) { FieldType type = schemaField.getType(); TableFieldSchema field = new TableFieldSchema().setName(schemaField.getName()); if (schemaField.getDescription() != null && !"".equals(schemaField.getDescription())) { field.setDescription(schemaField.getDescription()); } if (!schemaField.getNullable()) { field.setMode(Mode.REQUIRED.toString()); } if (TypeName.ARRAY == type.getTypeName()) { type = type.getCollectionElementType(); field.setMode(Mode.REPEATED.toString()); } if (TypeName.ROW == type.getTypeName()) { Schema subType = type.getRowSchema(); field.setFields(toTableFieldSchema(subType)); } field.setType(toStandardSQLTypeName(type).toString()); fields.add(field); } return fields; }
@Test public void testCollector() { Schema schema = Stream.of( Schema.Field.of("f_int", FieldType.INT32), Schema.Field.of("f_string", FieldType.STRING)) .collect(toSchema()); assertEquals(2, schema.getFieldCount()); assertEquals("f_int", schema.getField(0).getName()); assertEquals(FieldType.INT32, schema.getField(0).getType()); assertEquals("f_string", schema.getField(1).getName()); assertEquals(FieldType.STRING, schema.getField(1).getType()); }