@Override synchronized public S getById(K id) throws IOException, SchemaRegistryException { if (_idBasedCache.containsKey(id)) { return _idBasedCache.get(id); } else { S schema = _kafkaSchemaRegistry.getById(id); _idBasedCache.put(id, schema); return schema; } }
@Override synchronized public K register(String name, S schema) throws IOException, SchemaRegistryException { Map<S, K> schemaIdMap; if (_namedSchemaCache.containsKey(name)) { schemaIdMap = _namedSchemaCache.get(name); } else { // we really care about reference equality to de-dup using cache // when it comes to registering schemas, so use an IdentityHashMap here schemaIdMap = new IdentityHashMap<>(); _namedSchemaCache.put(name, schemaIdMap); } if (schemaIdMap.containsKey(schema)) { return schemaIdMap.get(schema); } else { // check if schemaIdMap is getting too full Preconditions.checkState(schemaIdMap.size() < _maxSchemaReferences, "Too many schema objects for " + name +". Cache is overfull."); } K id = _kafkaSchemaRegistry.register(name, schema); schemaIdMap.put(schema, id); _idBasedCache.put(id, schema); return id; }
/** * This call is not cached because we never want to miss out on the latest schema. * {@inheritDoc} */ @Override public S getLatestSchema(String name) throws IOException, SchemaRegistryException { return _kafkaSchemaRegistry.getLatestSchema(name); }
@Test public void testRegisterShouldCacheIds() throws IOException, SchemaRegistryException { KafkaSchemaRegistry<Integer, String> baseRegistry = mock(KafkaSchemaRegistry.class); CachingKafkaSchemaRegistry<Integer, String> cachingReg = new CachingKafkaSchemaRegistry<>(baseRegistry, 2); String name = "test"; String schema1 = new String("schema"); Integer id1 = 1; // first register name, schema1, get back id1 when(baseRegistry.register(name, schema1)).thenReturn(id1); Assert.assertEquals(cachingReg.register(name, schema1), id1); // getById should hit the cache and return id1 when(baseRegistry.getById(id1)).thenReturn(new String("schema2")); Assert.assertEquals(cachingReg.getById(id1), schema1); verify(baseRegistry, times(0)).getById(anyInt()); }
/** * Create a caching schema registry. * @param kafkaSchemaRegistry: a schema registry that needs caching * @param maxSchemaReferences: the maximum number of unique references that can exist for a given schema. */ public CachingKafkaSchemaRegistry(KafkaSchemaRegistry kafkaSchemaRegistry, int maxSchemaReferences) { Preconditions.checkArgument(kafkaSchemaRegistry!=null, "KafkaSchemaRegistry cannot be null"); Preconditions.checkArgument(!kafkaSchemaRegistry.hasInternalCache(), "SchemaRegistry already has a cache."); _kafkaSchemaRegistry = kafkaSchemaRegistry; _namedSchemaCache = new HashMap<>(); _idBasedCache = new HashMap<>(); _maxSchemaReferences = maxSchemaReferences; }
@SuppressWarnings("unchecked") public static KafkaSchemaRegistry getSchemaRegistry(Properties props) { Preconditions.checkArgument(props.containsKey(KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS), "Missing required property " + KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS); boolean tryCache = Boolean.parseBoolean(props.getProperty(KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CACHE, DEFAULT_TRY_CACHING)); Class<?> clazz; try { clazz = (Class<?>) Class.forName(props.getProperty(KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS)); KafkaSchemaRegistry schemaRegistry = (KafkaSchemaRegistry) ConstructorUtils.invokeConstructor(clazz, props); if (tryCache && !schemaRegistry.hasInternalCache()) { schemaRegistry = new CachingKafkaSchemaRegistry(schemaRegistry); } return schemaRegistry; } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException e) { log.error("Failed to instantiate " + KafkaSchemaRegistry.class, e); throw Throwables.propagate(e); } } }
public byte[] serialize(String topic, GenericRecord data) throws SerializationException { Schema schema = data.getSchema(); MD5Digest schemaId = null; try { schemaId = schemaRegistry.register(topic, schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); // MAGIC_BYTE | schemaId-bytes | avro_payload out.write(LiAvroSerDeHelper.MAGIC_BYTE); out.write(schemaId.asBytes()); BinaryEncoder encoder = encoderFactory.directBinaryEncoder(out, null); DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema); writer.write(data, encoder); encoder.flush(); byte[] bytes = out.toByteArray(); out.close(); return bytes; } catch (IOException | SchemaRegistryException e) { throw new SerializationException(e); } }
@Test public void testIdSchemaCaching() throws IOException, SchemaRegistryException { KafkaSchemaRegistry<Integer, String> baseRegistry = mock(KafkaSchemaRegistry.class); String name = "test"; String schema1 = new String("schema"); Integer id1 = 1; CachingKafkaSchemaRegistry<Integer, String> cachingReg = new CachingKafkaSchemaRegistry<>(baseRegistry, 2); when(baseRegistry.getById(id1)).thenReturn(schema1); String schemaReturned = cachingReg.getById(id1); Assert.assertEquals(schemaReturned, schema1, "Schema returned by id should be the same"); verify(baseRegistry, times(1)).getById(anyInt()); when(baseRegistry.getById(id1)).thenReturn(new String("schema2")); Assert.assertEquals(cachingReg.getById(id1), schemaReturned); verify(baseRegistry, times(1)).getById(anyInt()); }
@Override public Schema convertSchema(S schemaIn, WorkUnitState workUnit) throws SchemaConversionException { Preconditions.checkArgument(workUnit.contains(KafkaSource.TOPIC_NAME), "Must specify topic name."); String topic = workUnit.getProp(KafkaSource.TOPIC_NAME); try { return (Schema) this.schemaRegistry.getLatestSchema(topic); } catch (IOException | SchemaRegistryException e) { throw new SchemaConversionException(e); } }
/** * Create a caching schema registry. * @param kafkaSchemaRegistry: a schema registry that needs caching * @param maxSchemaReferences: the maximum number of unique references that can exist for a given schema. */ public CachingKafkaSchemaRegistry(KafkaSchemaRegistry kafkaSchemaRegistry, int maxSchemaReferences) { Preconditions.checkArgument(kafkaSchemaRegistry!=null, "KafkaSchemaRegistry cannot be null"); Preconditions.checkArgument(!kafkaSchemaRegistry.hasInternalCache(), "SchemaRegistry already has a cache."); _kafkaSchemaRegistry = kafkaSchemaRegistry; _namedSchemaCache = new HashMap<>(); _idBasedCache = new HashMap<>(); _maxSchemaReferences = maxSchemaReferences; }
@Test public void testRegisterSchemaCaching() throws IOException, SchemaRegistryException { KafkaSchemaRegistry<Integer, String> baseRegistry = mock(KafkaSchemaRegistry.class); String name = "test"; String schema1 = new String("schema"); Integer id1 = 1; CachingKafkaSchemaRegistry<Integer, String> cachingReg = new CachingKafkaSchemaRegistry<>(baseRegistry, 2); when(baseRegistry.register(name, schema1)).thenReturn(id1); Assert.assertEquals(cachingReg.register(name, schema1), id1); Integer id2 = 2; when(baseRegistry.register(name, schema1)).thenReturn(id2); // Test that we get back the original id Assert.assertEquals(cachingReg.register(name, schema1), id1); // Ensure that we only called baseRegistry.register once. verify(baseRegistry, times(1)).register(anyString(), anyString()); }
Schema schema = _schemaRegistry.getById(schemaId); Decoder decoder = DecoderFactory.get().binaryDecoder(data, 1 + MD5Digest.MD5_BYTES_LENGTH, data.length - MD5Digest.MD5_BYTES_LENGTH - 1, null);
/** * This call is not cached because we never want to miss out on the latest schema. * {@inheritDoc} */ @Override public S getLatestSchema(String name) throws IOException, SchemaRegistryException { return _kafkaSchemaRegistry.getLatestSchema(name); }
@SuppressWarnings("unchecked") public static KafkaSchemaRegistry getSchemaRegistry(Properties props) { Preconditions.checkArgument(props.containsKey(KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS), "Missing required property " + KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS); boolean tryCache = Boolean.parseBoolean(props.getProperty(KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CACHE, DEFAULT_TRY_CACHING)); Class<?> clazz; try { clazz = (Class<?>) Class.forName(props.getProperty(KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_CLASS)); KafkaSchemaRegistry schemaRegistry = (KafkaSchemaRegistry) ConstructorUtils.invokeConstructor(clazz, props); if (tryCache && !schemaRegistry.hasInternalCache()) { schemaRegistry = new CachingKafkaSchemaRegistry(schemaRegistry); } return schemaRegistry; } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException e) { log.error("Failed to instantiate " + KafkaSchemaRegistry.class, e); throw Throwables.propagate(e); } } }
@Test public void testMaxReferences() throws IOException, SchemaRegistryException { KafkaSchemaRegistry<Integer, String> baseRegistry = mock(KafkaSchemaRegistry.class); String name = "test"; String schema1 = new String("schema"); String schema2 = new String("schema"); String schema3 = new String("schema"); Integer id1 = 1; Integer id2 = 2; Integer id3 = 3; CachingKafkaSchemaRegistry<Integer, String> cachingReg = new CachingKafkaSchemaRegistry<>(baseRegistry, 2); when(baseRegistry.register(name, schema1)).thenReturn(id1); Assert.assertEquals(cachingReg.register(name, schema1), id1); when(baseRegistry.register(name, schema2)).thenReturn(id2); Assert.assertEquals(cachingReg.register(name, schema2), id2); when(baseRegistry.register(name, schema3)).thenReturn(id3); try { cachingReg.register(name, schema3); Assert.fail("Should have thrown an exception"); } catch (Exception e) { log.info(e.getMessage()); } }
@Override synchronized public S getById(K id) throws IOException, SchemaRegistryException { if (_idBasedCache.containsKey(id)) { return _idBasedCache.get(id); } else { S schema = _kafkaSchemaRegistry.getById(id); _idBasedCache.put(id, schema); return schema; } }
@Override public Schema convertSchema(S schemaIn, WorkUnitState workUnit) throws SchemaConversionException { Preconditions.checkArgument(workUnit.contains(KafkaSource.TOPIC_NAME), "Must specify topic name."); String topic = workUnit.getProp(KafkaSource.TOPIC_NAME); try { return (Schema) this.schemaRegistry.getLatestSchema(topic); } catch (IOException | SchemaRegistryException e) { throw new SchemaConversionException(e); } }
@Override synchronized public K register(String name, S schema) throws IOException, SchemaRegistryException { Map<S, K> schemaIdMap; if (_namedSchemaCache.containsKey(name)) { schemaIdMap = _namedSchemaCache.get(name); } else { // we really care about reference equality to de-dup using cache // when it comes to registering schemas, so use an IdentityHashMap here schemaIdMap = new IdentityHashMap<>(); _namedSchemaCache.put(name, schemaIdMap); } if (schemaIdMap.containsKey(schema)) { return schemaIdMap.get(schema); } else { // check if schemaIdMap is getting too full Preconditions.checkState(schemaIdMap.size() < _maxSchemaReferences, "Too many schema objects for " + name +". Cache is overfull."); } K id = _kafkaSchemaRegistry.register(name, schema); schemaIdMap.put(schema, id); _idBasedCache.put(id, schema); return id; }
Schema schema = _schemaRegistry.getById(schemaId); Decoder decoder = DecoderFactory.get().binaryDecoder(data, 1 + MD5Digest.MD5_BYTES_LENGTH, data.length - MD5Digest.MD5_BYTES_LENGTH - 1, null);
public byte[] serialize(String topic, GenericRecord data) throws SerializationException { Schema schema = data.getSchema(); MD5Digest schemaId = null; try { schemaId = schemaRegistry.register(topic, schema); ByteArrayOutputStream out = new ByteArrayOutputStream(); // MAGIC_BYTE | schemaId-bytes | avro_payload out.write(LiAvroSerDeHelper.MAGIC_BYTE); out.write(schemaId.asBytes()); BinaryEncoder encoder = encoderFactory.directBinaryEncoder(out, null); DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema); writer.write(data, encoder); encoder.flush(); byte[] bytes = out.toByteArray(); out.close(); return bytes; } catch (IOException | SchemaRegistryException e) { throw new SerializationException(e); } }