/** * Remove map, array, enum fields, as well as union fields that contain map, array or enum, * from an Avro schema. A schema with these fields cannot be used as Mapper key in a * MapReduce job. */ public static Optional<Schema> removeUncomparableFields(Schema schema) { return removeUncomparableFields(schema, Sets.<Schema> newHashSet()); }
private static Optional<Schema> removeUncomparableFieldsFromUnion(Schema union, Set<Schema> processed) { Preconditions.checkArgument(union.getType() == Schema.Type.UNION); if (processed.contains(union)) { return Optional.absent(); } processed.add(union); List<Schema> newUnion = Lists.newArrayList(); for (Schema unionType : union.getTypes()) { Optional<Schema> newType = removeUncomparableFields(unionType, processed); if (newType.isPresent()) { newUnion.add(newType.get()); } } // Discard the union field if one or more types are removed from the union. if (newUnion.size() != union.getTypes().size()) { return Optional.absent(); } return Optional.of(Schema.createUnion(newUnion)); }
private static Optional<Schema> removeUncomparableFieldsFromRecord(Schema record, Set<Schema> processed) { Preconditions.checkArgument(record.getType() == Schema.Type.RECORD); if (processed.contains(record)) { return Optional.absent(); } processed.add(record); List<Field> fields = Lists.newArrayList(); for (Field field : record.getFields()) { Optional<Schema> newFieldSchema = removeUncomparableFields(field.schema(), processed); if (newFieldSchema.isPresent()) { fields.add(new Field(field.name(), newFieldSchema.get(), field.doc(), field.defaultValue())); } } Schema newSchema = Schema.createRecord(record.getName(), record.getDoc(), record.getNamespace(), false); newSchema.setFields(fields); return Optional.of(newSchema); }
if (dedupKeyOption == MRCompactorAvroKeyDedupJobRunner.DedupKeyOption.ALL) { log.info("Using all attributes in the schema (except Map, Arrar and Enum fields) for compaction"); keySchema = AvroUtils.removeUncomparableFields(topicSchema).get(); } else if (dedupKeyOption == MRCompactorAvroKeyDedupJobRunner.DedupKeyOption.KEY) { log.info("Using key attributes in the schema for compaction"); keySchema = AvroUtils.removeUncomparableFields(MRCompactorAvroKeyDedupJobRunner.getKeySchema(topicSchema)).get(); } else if (keySchemaFileSpecified) { Path keySchemaFile = new Path(state.getProp(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_KEY_SCHEMA_LOC)); log.error("Failed to parse avro schema from " + keySchemaFile + ", using key attributes in the schema for compaction"); keySchema = AvroUtils.removeUncomparableFields(MRCompactorAvroKeyDedupJobRunner.getKeySchema(topicSchema)).get(); log.warn(String.format("Key schema %s is not compatible with record schema %s.", keySchema, topicSchema) + "Using key attributes in the schema for compaction"); keySchema = AvroUtils.removeUncomparableFields(MRCompactorAvroKeyDedupJobRunner.getKeySchema(topicSchema)).get(); keySchema = AvroUtils.removeUncomparableFields(MRCompactorAvroKeyDedupJobRunner.getKeySchema(topicSchema)).get();
if (dedupKeyOption == DedupKeyOption.ALL) { LOG.info("Using all attributes in the schema (except Map, Arrar and Enum fields) for compaction"); keySchema = AvroUtils.removeUncomparableFields(topicSchema).get(); } else if (dedupKeyOption == DedupKeyOption.KEY) { LOG.info("Using key attributes in the schema for compaction"); keySchema = AvroUtils.removeUncomparableFields(getKeySchema(topicSchema)).get(); } else if (keySchemaFileSpecified()) { Path keySchemaFile = getKeySchemaFile(); LOG.error("Failed to parse avro schema from " + keySchemaFile + ", using key attributes in the schema for compaction"); keySchema = AvroUtils.removeUncomparableFields(getKeySchema(topicSchema)).get(); keySchema = AvroUtils.removeUncomparableFields(getKeySchema(topicSchema)).get(); keySchema = AvroUtils.removeUncomparableFields(getKeySchema(topicSchema)).get();