Skip to content

Commit 75ee139

Browse files
charlescdrlecomte
authored andcommitted
Implement logical types conversion for serializer/deserializer
1 parent c5b16ea commit 75ee139

File tree

12 files changed

+134
-18
lines changed

12 files changed

+134
-18
lines changed

common/src/main/java/com/amazonaws/services/schemaregistry/common/configs/GlueSchemaRegistryConfiguration.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ public class GlueSchemaRegistryConfiguration {
6868
private List<SerializationFeature> jacksonSerializationFeatures;
6969
private List<DeserializationFeature> jacksonDeserializationFeatures;
7070

71+
private boolean logicalTypesConversionEnabled;
72+
7173
public GlueSchemaRegistryConfiguration(String region) {
7274
Map<String, Object> config = new HashMap<>();
7375
config.put(AWSSchemaRegistryConstants.AWS_REGION, region);
@@ -104,6 +106,7 @@ private void buildSchemaRegistryConfigs(Map<String, ?> configs) {
104106
validateAndSetUserAgent(configs);
105107
validateAndSetSecondaryDeserializer(configs);
106108
validateAndSetProxyUrl(configs);
109+
validateAndSetLogicalTypesConversionEnabled(configs);
107110
}
108111

109112
private void validateAndSetSecondaryDeserializer(Map<String, ?> configs) {
@@ -130,6 +133,12 @@ private void validateAndSetUserAgent(Map<String, ?> configs) {
130133
}
131134
}
132135

136+
private void validateAndSetLogicalTypesConversionEnabled(Map<String, ?> configs) {
137+
if (isPresent(configs, AWSSchemaRegistryConstants.LOGICAL_TYPES_CONVERSION_ENABLED)) {
138+
this.logicalTypesConversionEnabled = (Boolean) configs.get(AWSSchemaRegistryConstants.LOGICAL_TYPES_CONVERSION_ENABLED);
139+
}
140+
}
141+
133142
private void validateAndSetCompressionType(Map<String, ?> configs) {
134143
if (isPresent(configs, AWSSchemaRegistryConstants.COMPRESSION_TYPE) && validateCompressionType(
135144
(String) configs.get(AWSSchemaRegistryConstants.COMPRESSION_TYPE))) {

common/src/main/java/com/amazonaws/services/schemaregistry/utils/AWSSchemaRegistryConstants.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ public final class AWSSchemaRegistryConstants {
172172
*/
173173
public static final String USER_AGENT_APP = "userAgentApp";
174174

175+
/**
176+
* Boolean indicating if logical types in avro data must be converted or not.
177+
*/
178+
public static final String LOGICAL_TYPES_CONVERSION_ENABLED = "logicalTypesConversionEnabled";
179+
175180
/**
176181
* Private constructor to avoid initialization of the class.
177182
*/

serializer-deserializer/src/main/java/com/amazonaws/services/schemaregistry/deserializers/avro/AvroDeserializer.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ public class AvroDeserializer implements GlueSchemaRegistryDataFormatDeserialize
5151
@Setter
5252
private AvroRecordType avroRecordType;
5353

54+
@Setter
55+
private boolean logicalTypesConversionEnabled;
56+
5457
@NonNull
5558
@Getter
5659
@VisibleForTesting
@@ -65,6 +68,7 @@ public class AvroDeserializer implements GlueSchemaRegistryDataFormatDeserialize
6568
public AvroDeserializer(GlueSchemaRegistryConfiguration configs) {
6669
this.schemaRegistrySerDeConfigs = configs;
6770
this.avroRecordType = configs.getAvroRecordType();
71+
this.logicalTypesConversionEnabled = configs.isLogicalTypesConversionEnabled();
6872
this.datumReaderCache =
6973
CacheBuilder
7074
.newBuilder()
@@ -111,7 +115,7 @@ private BinaryDecoder getBinaryDecoder(byte[] data, int start, int end) {
111115
private class DatumReaderCache extends CacheLoader<String, DatumReader<Object>> {
112116
@Override
113117
public DatumReader<Object> load(String schema) throws Exception {
114-
return DatumReaderInstance.from(schema, avroRecordType);
118+
return DatumReaderInstance.from(schema, avroRecordType, logicalTypesConversionEnabled);
115119
}
116120
}
117121
}

serializer-deserializer/src/main/java/com/amazonaws/services/schemaregistry/deserializers/avro/DatumReaderInstance.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public class DatumReaderInstance {
2929
* @throws IllegalAccessException can be thrown readerClass.newInstance() from
3030
* java.lang.Class implementation
3131
*/
32-
public static DatumReader<Object> from(String writerSchemaDefinition, AvroRecordType avroRecordType)
32+
public static DatumReader<Object> from(String writerSchemaDefinition, AvroRecordType avroRecordType, boolean logicalTypesConversionEnabled)
3333
throws InstantiationException, IllegalAccessException {
3434

3535
Schema writerSchema = AVRO_UTILS.parseSchema(writerSchemaDefinition);
@@ -47,7 +47,11 @@ public static DatumReader<Object> from(String writerSchemaDefinition, AvroRecord
4747
case GENERIC_RECORD:
4848
log.debug("Using GenericDatumReader for de-serializing Avro message, schema: {})",
4949
writerSchema.toString());
50-
return new GenericDatumReader<>(writerSchema);
50+
if (logicalTypesConversionEnabled) {
51+
return new GenericDatumReader<>(writerSchema, writerSchema, GenericDataWithLogicalTypesConversion.getInstance());
52+
} else {
53+
return new GenericDatumReader<>(writerSchema);
54+
}
5155

5256
default:
5357
String message = String.format("Unsupported AvroRecordType: %s",
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package com.amazonaws.services.schemaregistry.deserializers.avro;
2+
3+
import lombok.extern.slf4j.Slf4j;
4+
import org.apache.avro.Conversions;
5+
import org.apache.avro.data.TimeConversions;
6+
import org.apache.avro.generic.GenericData;
7+
8+
@Slf4j
9+
public class GenericDataWithLogicalTypesConversion {
10+
private static final GenericData INSTANCE = new GenericData();
11+
12+
static {
13+
INSTANCE.addLogicalTypeConversion(new Conversions.DecimalConversion());
14+
INSTANCE.addLogicalTypeConversion(new Conversions.UUIDConversion());
15+
INSTANCE.addLogicalTypeConversion(new TimeConversions.DateConversion());
16+
INSTANCE.addLogicalTypeConversion(new TimeConversions.TimeMillisConversion());
17+
INSTANCE.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion());
18+
INSTANCE.addLogicalTypeConversion(new TimeConversions.LocalTimestampMillisConversion());
19+
INSTANCE.addLogicalTypeConversion(new TimeConversions.LocalTimestampMicrosConversion());
20+
INSTANCE.addLogicalTypeConversion(new TimeConversions.TimestampMillisConversion());
21+
INSTANCE.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion());
22+
}
23+
24+
public static GenericData getInstance() {
25+
return INSTANCE;
26+
}
27+
}

serializer-deserializer/src/main/java/com/amazonaws/services/schemaregistry/serializers/GlueSchemaRegistrySerializerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public GlueSchemaRegistryDataFormatSerializer getInstance(@NonNull DataFormat da
4545
@NonNull GlueSchemaRegistryConfiguration glueSchemaRegistryConfig) {
4646
switch (dataFormat) {
4747
case AVRO:
48-
this.serializerMap.computeIfAbsent(dataFormat, key -> new AvroSerializer());
48+
this.serializerMap.computeIfAbsent(dataFormat, key -> new AvroSerializer(glueSchemaRegistryConfig));
4949

5050
log.debug("Returning Avro serializer instance from GlueSchemaRegistrySerializerFactory");
5151
return this.serializerMap.get(dataFormat);

serializer-deserializer/src/main/java/com/amazonaws/services/schemaregistry/serializers/avro/AvroSerializer.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package com.amazonaws.services.schemaregistry.serializers.avro;
1616

1717
import com.amazonaws.services.schemaregistry.common.GlueSchemaRegistryDataFormatSerializer;
18+
import com.amazonaws.services.schemaregistry.common.configs.GlueSchemaRegistryConfiguration;
1819
import com.amazonaws.services.schemaregistry.exception.AWSSchemaRegistryException;
1920
import com.amazonaws.services.schemaregistry.utils.AVROUtils;
2021
import com.amazonaws.services.schemaregistry.utils.AvroRecordType;
@@ -45,12 +46,14 @@
4546
public class AvroSerializer implements GlueSchemaRegistryDataFormatSerializer {
4647
private AVROUtils avroUtils = AVROUtils.getInstance();
4748
private static final long MAX_DATUM_WRITER_CACHE_SIZE = 100;
49+
private final boolean logicalTypesConversionEnabled;
4850

4951
@NonNull
5052
@VisibleForTesting
5153
protected final LoadingCache<DatumWriterCacheKey, DatumWriter<Object>> datumWriterCache;
5254

53-
public AvroSerializer() {
55+
public AvroSerializer(GlueSchemaRegistryConfiguration glueSchemaRegistryConfig) {
56+
this.logicalTypesConversionEnabled = glueSchemaRegistryConfig.isLogicalTypesConversionEnabled();
5457
this.datumWriterCache =
5558
CacheBuilder
5659
.newBuilder()
@@ -61,7 +64,7 @@ public AvroSerializer() {
6164
@Override
6265
public byte[] serialize(Object data) {
6366
byte[] bytes;
64-
bytes = serialize(data, createDatumWriter(data));
67+
bytes = serialize(data, createDatumWriter(data, logicalTypesConversionEnabled));
6568

6669
return bytes;
6770
}
@@ -74,19 +77,19 @@ public byte[] serialize(Object data) {
7477
* @param object the Avro message
7578
* @return Avro datum writer for serialization
7679
*/
77-
private DatumWriter<Object> createDatumWriter(Object object) {
80+
private DatumWriter<Object> createDatumWriter(Object object, boolean logicalTypesConversionEnabled) {
7881
org.apache.avro.Schema schema = AVROUtils.getInstance()
7982
.getSchema(object);
8083
if (object instanceof SpecificRecord) {
8184
return getSpecificDatumWriter(schema);
8285
} else if (object instanceof GenericRecord) {
83-
return getGenericDatumWriter(schema);
86+
return getGenericDatumWriter(schema, logicalTypesConversionEnabled);
8487
} else if (object instanceof GenericData.EnumSymbol) {
85-
return getGenericDatumWriter(schema);
88+
return getGenericDatumWriter(schema, logicalTypesConversionEnabled);
8689
} else if (object instanceof GenericData.Array) {
87-
return getGenericDatumWriter(schema);
90+
return getGenericDatumWriter(schema, logicalTypesConversionEnabled);
8891
} else if (object instanceof GenericData.Fixed) {
89-
return getGenericDatumWriter(schema);
92+
return getGenericDatumWriter(schema, logicalTypesConversionEnabled);
9093
} else {
9194
String message =
9295
String.format("Unsupported type passed for serialization: %s", object);
@@ -96,13 +99,13 @@ private DatumWriter<Object> createDatumWriter(Object object) {
9699

97100
@SneakyThrows
98101
private DatumWriter<Object> getSpecificDatumWriter(Schema schema) {
99-
DatumWriterCacheKey datumWriterCacheKey = new DatumWriterCacheKey(schema, AvroRecordType.SPECIFIC_RECORD);
102+
DatumWriterCacheKey datumWriterCacheKey = new DatumWriterCacheKey(schema, AvroRecordType.SPECIFIC_RECORD, false);
100103
return datumWriterCache.get(datumWriterCacheKey);
101104
}
102105

103106
@SneakyThrows
104-
private DatumWriter<Object> getGenericDatumWriter(Schema schema) {
105-
DatumWriterCacheKey datumWriterCacheKey = new DatumWriterCacheKey(schema, AvroRecordType.GENERIC_RECORD);
107+
private DatumWriter<Object> getGenericDatumWriter(Schema schema, boolean logicalTypesConversionEnabled) {
108+
DatumWriterCacheKey datumWriterCacheKey = new DatumWriterCacheKey(schema, AvroRecordType.GENERIC_RECORD, logicalTypesConversionEnabled);
106109
return datumWriterCache.get(datumWriterCacheKey);
107110
}
108111

@@ -160,14 +163,16 @@ private static class DatumWriterCacheKey {
160163
private final Schema schema;
161164
@NonNull
162165
private final AvroRecordType avroRecordType;
166+
private final boolean logicalTypesConversionEnabled;
163167
}
164168

165169
private static class DatumWriterCache extends CacheLoader<DatumWriterCacheKey, DatumWriter<Object>> {
166170
@Override
167171
public DatumWriter<Object> load(DatumWriterCacheKey datumWriterCacheKey) {
168172
Schema schema = datumWriterCacheKey.getSchema();
169173
AvroRecordType avroRecordType = datumWriterCacheKey.getAvroRecordType();
170-
return DatumWriterInstance.get(schema, avroRecordType);
174+
boolean logicalTypesConversionEnabled = datumWriterCacheKey.isLogicalTypesConversionEnabled();
175+
return DatumWriterInstance.get(schema, avroRecordType, logicalTypesConversionEnabled);
171176
}
172177
}
173178
}

serializer-deserializer/src/main/java/com/amazonaws/services/schemaregistry/serializers/avro/DatumWriterInstance.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.amazonaws.services.schemaregistry.serializers.avro;
22

3+
import com.amazonaws.services.schemaregistry.deserializers.avro.GenericDataWithLogicalTypesConversion;
34
import com.amazonaws.services.schemaregistry.exception.AWSSchemaRegistryException;
45
import com.amazonaws.services.schemaregistry.utils.AvroRecordType;
56
import org.apache.avro.Schema;
@@ -8,12 +9,16 @@
89
import org.apache.avro.specific.SpecificDatumWriter;
910

1011
public class DatumWriterInstance {
11-
public static DatumWriter<Object> get(Schema schema, AvroRecordType avroRecordType) {
12+
public static DatumWriter<Object> get(Schema schema, AvroRecordType avroRecordType, boolean logicalTypesConversionEnabled) {
1213
switch (avroRecordType) {
1314
case SPECIFIC_RECORD:
1415
return new SpecificDatumWriter<>(schema);
1516
case GENERIC_RECORD:
16-
return new GenericDatumWriter<>(schema);
17+
if (logicalTypesConversionEnabled) {
18+
return new GenericDatumWriter<>(schema, GenericDataWithLogicalTypesConversion.getInstance());
19+
} else {
20+
return new GenericDatumWriter<>(schema);
21+
}
1722
case UNKNOWN:
1823
default:
1924
String message =

serializer-deserializer/src/test/java/com/amazonaws/services/schemaregistry/deserializers/avro/AvroDeserializerTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import java.util.Map;
5151
import java.util.UUID;
5252

53+
import static com.amazonaws.services.schemaregistry.utils.RecordGenerator.AVRO_USER_LOGICAL_TYPES_SCHEMA_FILE_PATH;
5354
import static org.junit.jupiter.api.Assertions.assertAll;
5455
import static org.junit.jupiter.api.Assertions.assertEquals;
5556
import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -87,6 +88,7 @@ public class AvroDeserializerTest {
8788
public void setup() {
8889
this.configs.put(AWSSchemaRegistryConstants.AWS_ENDPOINT, "https://test");
8990
this.configs.put(AWSSchemaRegistryConstants.AWS_REGION, "us-west-2");
91+
this.configs.put(AWSSchemaRegistryConstants.LOGICAL_TYPES_CONVERSION_ENABLED, true);
9092
this.schemaRegistrySerDeConfigs = new GlueSchemaRegistryConfiguration(this.configs);
9193

9294
MockitoAnnotations.initMocks(this);
@@ -298,6 +300,28 @@ public void testDeserialize_genericRecord_equalsOriginal(AWSSchemaRegistryConsta
298300
assertEquals(1, avroDeserializer.getDatumReaderCache().size());
299301
}
300302

303+
/**
304+
* Test whether the serialized generic record with logical types can be de-serialized back to the
305+
* generic record instance with conversions.
306+
*/
307+
@ParameterizedTest
308+
@EnumSource(AWSSchemaRegistryConstants.COMPRESSION.class)
309+
public void testDeserialize_genericRecord_with_logicalTypes_equalsOriginal(AWSSchemaRegistryConstants.COMPRESSION compressionType) {
310+
GenericRecord genericRecord = RecordGenerator.createGenericAvroRecordWithLogicalTypes();
311+
312+
ByteBuffer serializedData = createBasicSerializedData(genericRecord, compressionType.name(), DataFormat.AVRO);
313+
org.apache.avro.Schema schema = SchemaLoader.loadAvroSchema(AVRO_USER_LOGICAL_TYPES_SCHEMA_FILE_PATH);
314+
AvroDeserializer avroDeserializer = createAvroDeserializer(AvroRecordType.GENERIC_RECORD);
315+
316+
com.amazonaws.services.schemaregistry.common.Schema schemaObject = new com.amazonaws.services.schemaregistry.common.Schema(
317+
schema.toString(), DataFormat.AVRO.name(), "testAvroSchema");
318+
319+
Object deserializedObject = avroDeserializer.deserialize(serializedData, schemaObject);
320+
assertGenericRecord(genericRecord, deserializedObject);
321+
//Assert the instance is getting cached.
322+
assertEquals(1, avroDeserializer.getDatumReaderCache().size());
323+
}
324+
301325
public void assertGenericRecord(GenericRecord genericRecord, Object deserializedObject) {
302326
assertTrue(deserializedObject instanceof GenericRecord);
303327
assertTrue(deserializedObject.equals(genericRecord));

serializer-deserializer/src/test/java/com/amazonaws/services/schemaregistry/serializers/avro/AvroSerializerTest.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
package com.amazonaws.services.schemaregistry.serializers.avro;
22

3+
import com.amazonaws.services.schemaregistry.common.configs.GlueSchemaRegistryConfiguration;
4+
import com.amazonaws.services.schemaregistry.utils.AWSSchemaRegistryConstants;
35
import com.amazonaws.services.schemaregistry.utils.RecordGenerator;
46
import org.apache.avro.generic.GenericRecord;
57
import org.junit.jupiter.api.Test;
68

9+
import java.util.HashMap;
10+
711
import static org.junit.jupiter.api.Assertions.assertEquals;
812

913
public class AvroSerializerTest {
1014

1115
@Test
1216
public void serialize_WhenSerializeIsCalled_ReturnsCachedInstance() {
13-
AvroSerializer avroSerializer = new AvroSerializer();
17+
GlueSchemaRegistryConfiguration config = new GlueSchemaRegistryConfiguration("eu-west-1");
18+
AvroSerializer avroSerializer = new AvroSerializer(config);
1419

1520
User specificUserRecord = RecordGenerator.createSpecificAvroRecord();
1621
GenericRecord genericUserRecord = RecordGenerator.createGenericUserMapAvroRecord();

0 commit comments

Comments
 (0)