From 9c3e2dd35e499fd26db45ffaf11c5cbaf0b8a0be Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 13 Feb 2025 16:53:34 -0800 Subject: [PATCH] Limit the max log size for schema in warning. --- .../linkedin/avro/fastserde/UtilsTest.java | 35 +++++++++++++++++++ .../fastserde/FastGenericDatumReader.java | 14 ++++++-- .../com/linkedin/avro/fastserde/Utils.java | 10 ++++++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/UtilsTest.java b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/UtilsTest.java index 68cfd70b2..bb3e9e100 100644 --- a/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/UtilsTest.java +++ b/fastserde/avro-fastserde-tests-common/src/test/java/com/linkedin/avro/fastserde/UtilsTest.java @@ -4,6 +4,7 @@ import javax.lang.model.SourceVersion; +import org.apache.avro.Schema; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -73,4 +74,38 @@ void shouldFailGeneratingValidJavaIdentifier(String invalidProposal) { // NPE expected Utils.toValidJavaIdentifier(invalidProposal); } + + @Test + void testGetTruncateSchemaForWarningSmall() { + String schemaJson = "{" + + "\"type\": \"record\"," + + "\"name\": \"User\"," + + "\"fields\": [" + + " {\"name\": \"name\", \"type\": \"string\"}" + + "]" + + "}"; + Schema schema = Schema.parse(schemaJson); + Assert.assertTrue(schema.toString().length() <= Utils.MAX_SCHEMA_LENGTH_IN_WARNING); + Assert.assertEquals(schema.toString(), Utils.getTruncateSchemaForWarning(schema)); + } + + @Test + void testGetTruncateSchemaForWarningLarge() { + String schemaJson = "{" + + "\"type\": \"record\"," + + "\"name\": \"User\"," + + "\"namespace\": \"com.example.avro\"," + + "\"fields\": [" + + " {\"name\": \"name\", \"type\": \"string\"}," + + " {\"name\": \"age\", \"type\": \"int\"}," + + " {\"name\": \"email\", \"type\": [\"null\", \"string\"], \"default\": null}" + + "]" + + "}"; + Schema schema = Schema.parse(schemaJson); + Assert.assertTrue(schema.toString().length() > Utils.MAX_SCHEMA_LENGTH_IN_WARNING); + String truncatedSchema = Utils.getTruncateSchemaForWarning(schema); + Assert.assertEquals(truncatedSchema.length(), Utils.MAX_SCHEMA_LENGTH_IN_WARNING + 3); + Assert.assertTrue(truncatedSchema.endsWith("...")); + Assert.assertTrue(schema.toString().startsWith(truncatedSchema.substring(0, Utils.MAX_SCHEMA_LENGTH_IN_WARNING))); + } } diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumReader.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumReader.java index 213d07766..41e1c41ca 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumReader.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastGenericDatumReader.java @@ -116,8 +116,18 @@ public T read(T reuse, Decoder in) throws IOException { */ cachedFastDeserializer.compareAndSet(null, getRegularAvroImplWhenGenerationFail(writerSchema, readerSchema, modelData, customization)); - LOGGER.warn("FastGenericDeserializer generation fails, and will cache cold deserializer " - + "for reader schema: [" + readerSchema + "], writer schema: [" + writerSchema + "]"); + + LOGGER.warn("FastGenericDeserializer generation fails, and will cache cold deserializer for " + + "reader schema: [" + Utils.getTruncateSchemaForWarning(readerSchema) + "]," + + "writer schema: [" + Utils.getTruncateSchemaForWarning(writerSchema) + "]."); + + if (LOGGER.isDebugEnabled() && + (readerSchema.toString().length() > Utils.MAX_SCHEMA_LENGTH_IN_WARNING || + writerSchema.toString().length() > Utils.MAX_SCHEMA_LENGTH_IN_WARNING)) { + LOGGER.debug("FastGenericDeserializer generation fails, and will cache cold deserializer for " + + "reader schema: [" + readerSchema + "]," + + "writer schema: [" + writerSchema + "]."); + } } fastDeserializer = cachedFastDeserializer.get(); } else { diff --git a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/Utils.java b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/Utils.java index 443461a28..162db48fe 100644 --- a/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/Utils.java +++ b/fastserde/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/Utils.java @@ -55,6 +55,9 @@ public class Utils { // Cache the mapping between Schema and the corresponding fingerprint private static final Map SCHEMA_IDS_CACHE = new ConcurrentHashMap<>(); + // Limit max schema length in WARNING logs. + static final int MAX_SCHEMA_LENGTH_IN_WARNING = 100; + private Utils() { } @@ -259,4 +262,11 @@ public static String toValidJavaIdentifier(String javaIdentifier) { return javaIdentifier; } + + static String getTruncateSchemaForWarning(Schema schema) { + String schemaString = schema.toString(); + return (schemaString.length() > MAX_SCHEMA_LENGTH_IN_WARNING) + ? schemaString.substring(0, MAX_SCHEMA_LENGTH_IN_WARNING) + "..." + : schemaString; + } }