/** * Flatten the <code>inputSchema</code> * {@inheritDoc} * @see gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter#convertSchema(org.apache.avro.Schema, gobblin.configuration.WorkUnitState) */ @Override public Schema convertSchema(Schema inputSchema, WorkUnitState workUnit) { return AVRO_FLATTENER.flatten(inputSchema, false); }
/*** * Copy properties from old Avro Schema to new Avro Schema * @param oldSchema Old Avro Schema to copy properties from * @param newSchema New Avro Schema to copy properties to */ private static void copyProperties(Schema oldSchema, Schema newSchema) { Preconditions.checkNotNull(oldSchema); Preconditions.checkNotNull(newSchema); Map<String, JsonNode> props = oldSchema.getJsonProps(); copyProperties(props, newSchema); }
flattenedSchema = Schema.createArray(flatten(schema.getElementType(), false)); } else { flattenedSchema = Schema.createArray(schema.getElementType()); case MAP: if (flattenComplexTypes) { flattenedSchema = Schema.createMap(flatten(schema.getValueType(), false)); } else { flattenedSchema = Schema.createMap(schema.getValueType()); break; case RECORD: flattenedSchema = flattenRecord(schema, shouldPopulateLineage, flattenComplexTypes); break; case STRING: break; case UNION: flattenedSchema = flattenUnion(schema, shouldPopulateLineage, flattenComplexTypes); break; default: copyProperties(schema, flattenedSchema);
if (null != f.schema().getFields() && f.schema().getFields().size() > 0) { for (Schema.Field field : f.schema().getFields()) { flattenedFields.addAll(flattenField(field, lineage, true, flattenComplexTypes, Optional.<Schema>absent())); Optional<Schema> optionalRecord = isOfOptionType(f.schema()); if (optionalRecord.isPresent()) { Schema record = optionalRecord.get(); if (record.getFields().size() > 0) { for (Schema.Field field : record.getFields()) { flattenedFields.addAll(flattenField(field, lineage, true, flattenComplexTypes, Optional.of(f.schema()))); Schema flattenedFieldSchema = flatten(f.schema(), shouldPopulateLineage, flattenComplexTypes); if (shouldWrapInOption.isPresent()) { boolean isNullFirstMember = Schema.Type.NULL.equals(shouldWrapInOption.get().getTypes().get(0).getType());
/*** * Flatten Record schema * @param schema Record Schema to flatten * @param shouldPopulateLineage If lineage information should be tagged in the field, this is true when we are * un-nesting fields * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Record Schema */ private Schema flattenRecord(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) { Preconditions.checkNotNull(schema); Preconditions.checkArgument(Schema.Type.RECORD.equals(schema.getType())); Schema flattenedSchema; List<Schema.Field> flattenedFields = new ArrayList<>(); if (schema.getFields().size() > 0) { for (Schema.Field oldField : schema.getFields()) { List<Schema.Field> newFields = flattenField(oldField, ImmutableList.<String>of(), shouldPopulateLineage, flattenComplexTypes, Optional.<Schema>absent()); if (null != newFields && newFields.size() > 0) { flattenedFields.addAll(newFields); } } } flattenedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); flattenedSchema.setFields(flattenedFields); return flattenedSchema; }
/*** * Flatten the Schema to un-nest recursive Records (to make it optimal for ORC) * @param schema Avro Schema to flatten * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Avro Schema */ public Schema flatten(Schema schema, boolean flattenComplexTypes) { Preconditions.checkNotNull(schema); // To help make it configurable later this.flattenedNameJoiner = FLATTENED_NAME_JOINER; this.flattenedSourceJoiner = FLATTENED_SOURCE_JOINER; Schema flattenedSchema = flatten(schema, false, flattenComplexTypes); LOG.debug("Original Schema : " + schema); LOG.debug("Flattened Schema: " + flattenedSchema); return flattenedSchema; }
/*** * Flatten Union Schema * @param schema Union Schema to flatten * @param shouldPopulateLineage If lineage information should be tagged in the field, this is true when we are * un-nesting fields * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Union Schema */ private Schema flattenUnion(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) { Preconditions.checkNotNull(schema); Preconditions.checkArgument(Schema.Type.UNION.equals(schema.getType())); Schema flattenedSchema; List<Schema> flattenedUnionMembers = new ArrayList<>(); if (null != schema.getTypes() && schema.getTypes().size() > 0) { for (Schema oldUnionMember : schema.getTypes()) { if (flattenComplexTypes) { // It's member might still recursively contain records flattenedUnionMembers.add(flatten(oldUnionMember, shouldPopulateLineage, flattenComplexTypes)); } else { flattenedUnionMembers.add(oldUnionMember); } } } flattenedSchema = Schema.createUnion(flattenedUnionMembers); return flattenedSchema; }