From 630768902fdfac602f91b809add0a63411dd44de Mon Sep 17 00:00:00 2001 From: Sathyavijayan Vittal Date: Thu, 18 Apr 2019 15:41:38 +0100 Subject: [PATCH 1/2] Make avro serde accept clj maps with underscores,add option to skip mangling names. --- src/jackdaw/serdes/avro.clj | 19 ++- test/jackdaw/serdes/avro_test.clj | 193 ++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+), 5 deletions(-) diff --git a/src/jackdaw/serdes/avro.clj b/src/jackdaw/serdes/avro.clj index 4d500501..e8449d75 100644 --- a/src/jackdaw/serdes/avro.clj +++ b/src/jackdaw/serdes/avro.clj @@ -86,11 +86,20 @@ (when schema-str (.parse (Schema$Parser.) ^String schema-str))))) +(def ^:dynamic *mangle-names* + "When true, record field names will be mangled during schema parse and + record de/serialization. Default value is `true`." + true) + (defn- ^String mangle [^String n] - (str/replace n #"-" "_")) + (if *mangle-names* (str/replace n #"-" "_") n)) (defn- ^String unmangle [^String n] - (str/replace n #"_" "-")) + (if *mangle-names* (str/replace n #"_" "-") n)) + +(defn- ->field-key + [field-name] + (keyword (unmangle field-name))) (defn- dispatch-on-type-fields [^Schema schema] @@ -398,7 +407,7 @@ (comp (map first) (map (fn [^Schema$Field field] (let [field-name (.name field) - field-key (keyword (unmangle field-name)) + field-key (->field-key field-name) [_ field-coercion :as entry] (get field->schema+coercion field-key) value (.get ^GenericData$Record avro-record field-name)] (when-not field-coercion @@ -425,7 +434,7 @@ new-k (.getName schema)) {:path path, :clj-data clj-map}))) - (let [[_ field-coercion] (get field->schema+coercion k) + (let [[_ field-coercion] (get field->schema+coercion (->field-key new-k)) new-v (clj->avro field-coercion v (conj path k))] (.set record-builder new-k new-v)))) @@ -442,7 +451,7 @@ [schema->coercion ^Schema schema] (let [fields (into {} (map (fn [^Schema$Field field] - [(keyword (unmangle (.name field))) + [(->field-key (.name field)) [field (schema->coercion (.schema field))]])) (.getFields schema))] (RecordType. schema fields))) diff --git a/test/jackdaw/serdes/avro_test.clj b/test/jackdaw/serdes/avro_test.clj index e450b96b..e6ad487d 100644 --- a/test/jackdaw/serdes/avro_test.clj +++ b/test/jackdaw/serdes/avro_test.clj @@ -457,6 +457,199 @@ "topic" (uuid/to-string uuid/+null+)))))))) +(deftest accept-unmangled-input + (let [schema {:name "testRecord" + :type "record" + :fields [{:name "string_field" + :type "string"} + {:name "long_field" + :type "long"} + {:name "optional_field" + :type ["null" "int"] + :default nil} + {:name "nil_field" + :type "null"} + {:name "default_field" + :type "long" + :default 1} + {:name "bytes_field" + :type "bytes"} + {:name "enum_field" + :type {:type "enum" + :name "weird_values" + :symbols ["a_1" "B3"]}} + {:name "map_field" + :type ["null" {:type "map" + :values bananas-schema}]} + {:name "array_field" + :type ["null" {:name "subrecords" + :type "array" + :items "banana"}]} + {:name "uuid_field" + :type {:type "string", + :logicalType "uuid"}}]} + schema-str (json/write-str schema) + serde (->serde schema-str)] + + (is (= (round-trip serde "bananas" + {:string_field "hello" + :long_field 3 + :optional_field 3 + :nil_field nil + :default_field 1 + :bytes_field (ByteBuffer/wrap (.getBytes "hello")) + :enum_field "a_1" + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + + :array_field [{:color "yellow"}] + :uuid_field uuid/+null+}) + {:string-field "hello" + :long-field 3 + :default-field 1 + :nil-field nil + :bytes-field (ByteBuffer/wrap (.getBytes "hello")) + :map-field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + :enum-field :a-1 + :optional-field 3 + :array-field [{:color "yellow"}] + :uuid-field uuid/+null+})) + + (is (= (round-trip serde "bananas" + {:string_field "hello" + :long_field 3 + :optional_field 3 + :nil_field nil + :default_field 1 + :bytes_field (ByteBuffer/wrap (.getBytes "hello")) + :enum_field :a_1 + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + + :array_field [{:color "yellow"}] + :uuid_field uuid/+null+}) + {:string-field "hello" + :long-field 3 + :default-field 1 + :nil-field nil + :bytes-field (ByteBuffer/wrap (.getBytes "hello")) + :map-field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + :enum-field :a-1 + :optional-field 3 + :array-field [{:color "yellow"}] + :uuid-field uuid/+null+})) + + + (is (= (round-trip serde "bananas" + {"string_field" "hello" + "long_field" 3 + "optional_field" 3 + "nil_field" nil + "default_field" 1 + "bytes_field" (ByteBuffer/wrap (.getBytes "hello")) + "enum_field" "a_1" + "map_field" {"banana" {"color" "yellow"} + "ripe b4nana$" {"color" "yellow-green"}} + "array_field" [{"color" "yellow"}] + "uuid_field" uuid/+null+}) + {:string-field "hello" + :long-field 3 + :default-field 1 + :nil-field nil + :bytes-field (ByteBuffer/wrap (.getBytes "hello")) + :map-field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + :enum-field :a-1 + :optional-field 3 + :array-field [{:color "yellow"}] + :uuid-field uuid/+null+})))) + +(deftest no-mangling-test + (binding [jackdaw.serdes.avro/*mangle-names* false] + (let [schema {:name "testRecord" + :type "record" + :fields [{:name "string_field" + :type "string"} + {:name "long_field" + :type "long"} + {:name "optional_field" + :type ["null" "int"] + :default nil} + {:name "nil_field" + :type "null"} + {:name "default_field" + :type "long" + :default 1} + {:name "bytes_field" + :type "bytes"} + {:name "enum_field" + :type {:type "enum" + :name "weird_values" + :symbols ["a_1" "B3"]}} + {:name "map_field" + :type ["null" {:type "map" + :values bananas-schema}]} + {:name "array_field" + :type ["null" {:name "subrecords" + :type "array" + :items "banana"}]} + {:name "uuid_field" + :type {:type "string", + :logicalType "uuid"}}]} + schema-str (json/write-str schema) + serde (->serde schema-str)] + + (is (= (round-trip serde "bananas" + {:string_field "hello" + :long_field 3 + :optional_field 3 + :nil_field nil + :default_field 1 + :bytes_field (ByteBuffer/wrap (.getBytes "hello")) + :enum_field :a_1 + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + + :array_field [{:color "yellow"}] + :uuid_field uuid/+null+}) + {:string_field "hello" + :long_field 3 + :default_field 1 + :nil_field nil + :bytes_field (ByteBuffer/wrap (.getBytes "hello")) + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + :enum_field :a_1 + :optional_field 3 + :array_field [{:color "yellow"}] + :uuid_field uuid/+null+})) + + (is (= (round-trip serde "bananas" + {"string_field" "hello" + "long_field" 3 + "optional_field" 3 + "nil_field" nil + "default_field" 1 + "bytes_field" (ByteBuffer/wrap (.getBytes "hello")) + "enum_field" "a_1" + "map_field" {"banana" {"color" "yellow"} + "ripe b4nana$" {"color" "yellow-green"}} + "array_field" [{"color" "yellow"}] + "uuid_field" uuid/+null+}) + {:string_field "hello" + :long_field 3 + :default_field 1 + :nil_field nil + :bytes_field (ByteBuffer/wrap (.getBytes "hello")) + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + :enum_field :a_1 + :optional_field 3 + :array_field [{:color "yellow"}] + :uuid_field uuid/+null+}))))) + (deftest schemaless-test (let [serde (->serde nil)] (is (= (round-trip serde "bananas" "hello") From 8b7a8945357f87e99a36a919a13e4f94f5bffa93 Mon Sep 17 00:00:00 2001 From: Sathyavijayan Vittal Date: Wed, 19 Jun 2019 11:55:52 +0100 Subject: [PATCH 2/2] restructure avro mangling tests to improve readability --- test/jackdaw/serdes/avro_test.clj | 238 ++++++++---------------------- 1 file changed, 63 insertions(+), 175 deletions(-) diff --git a/test/jackdaw/serdes/avro_test.clj b/test/jackdaw/serdes/avro_test.clj index e6ad487d..d51c2d9c 100644 --- a/test/jackdaw/serdes/avro_test.clj +++ b/test/jackdaw/serdes/avro_test.clj @@ -457,198 +457,86 @@ "topic" (uuid/to-string uuid/+null+)))))))) -(deftest accept-unmangled-input - (let [schema {:name "testRecord" - :type "record" - :fields [{:name "string_field" - :type "string"} - {:name "long_field" - :type "long"} - {:name "optional_field" - :type ["null" "int"] - :default nil} - {:name "nil_field" - :type "null"} - {:name "default_field" - :type "long" - :default 1} - {:name "bytes_field" - :type "bytes"} - {:name "enum_field" - :type {:type "enum" - :name "weird_values" - :symbols ["a_1" "B3"]}} - {:name "map_field" - :type ["null" {:type "map" - :values bananas-schema}]} - {:name "array_field" - :type ["null" {:name "subrecords" - :type "array" - :items "banana"}]} - {:name "uuid_field" - :type {:type "string", - :logicalType "uuid"}}]} - schema-str (json/write-str schema) - serde (->serde schema-str)] +(def mangling-test-schema + {:name "testRecord" + :type "record" + :fields [{:name "enum_field" + :type {:type "enum" + :name "weird_values" + :symbols ["a_1" "B3"]}} + {:name "map_field" + :type ["null" {:type "map" + :values bananas-schema}]} + {:name "array_field" + :type ["null" {:name "subrecords" + :type "array" + :items "banana"}]}]}) - (is (= (round-trip serde "bananas" - {:string_field "hello" - :long_field 3 - :optional_field 3 - :nil_field nil - :default_field 1 - :bytes_field (ByteBuffer/wrap (.getBytes "hello")) - :enum_field "a_1" - :map_field {"banana" {:color "yellow"} - "ripe b4nana$" {:color "yellow-green"}} - - :array_field [{:color "yellow"}] - :uuid_field uuid/+null+}) - {:string-field "hello" - :long-field 3 - :default-field 1 - :nil-field nil - :bytes-field (ByteBuffer/wrap (.getBytes "hello")) - :map-field {"banana" {:color "yellow"} - "ripe b4nana$" {:color "yellow-green"}} - :enum-field :a-1 - :optional-field 3 - :array-field [{:color "yellow"}] - :uuid-field uuid/+null+})) +(deftest accept-unmangled-input + "Ensure that it is possible to serialize non-idiomatic but perfectly + valid clojure maps. For eg. string values for enums,stringified keys + and keys with underscores with in the input." + (let [serde (-> mangling-test-schema + json/write-str + ->serde)] (is (= (round-trip serde "bananas" - {:string_field "hello" - :long_field 3 - :optional_field 3 - :nil_field nil - :default_field 1 - :bytes_field (ByteBuffer/wrap (.getBytes "hello")) - :enum_field :a_1 - :map_field {"banana" {:color "yellow"} - "ripe b4nana$" {:color "yellow-green"}} - - :array_field [{:color "yellow"}] - :uuid_field uuid/+null+}) - {:string-field "hello" - :long-field 3 - :default-field 1 - :nil-field nil - :bytes-field (ByteBuffer/wrap (.getBytes "hello")) - :map-field {"banana" {:color "yellow"} + {:enum-field "a_1" + :map-field {"banana" {"color" "yellow"} + "ripe b4nana$" {"color" "yellow-green"}} + :array-field [{"color" "yellow"}]}) + {:map-field {"banana" {:color "yellow"} "ripe b4nana$" {:color "yellow-green"}} :enum-field :a-1 - :optional-field 3 - :array-field [{:color "yellow"}] - :uuid-field uuid/+null+})) + :array-field [{:color "yellow"}]}) + "Accept unmangled values for enums and maps") (is (= (round-trip serde "bananas" - {"string_field" "hello" - "long_field" 3 - "optional_field" 3 - "nil_field" nil - "default_field" 1 - "bytes_field" (ByteBuffer/wrap (.getBytes "hello")) - "enum_field" "a_1" - "map_field" {"banana" {"color" "yellow"} + {"enum_field" "a_1" + "map-field" {"banana" {"color" "yellow"} "ripe b4nana$" {"color" "yellow-green"}} - "array_field" [{"color" "yellow"}] - "uuid_field" uuid/+null+}) - {:string-field "hello" - :long-field 3 - :default-field 1 - :nil-field nil - :bytes-field (ByteBuffer/wrap (.getBytes "hello")) - :map-field {"banana" {:color "yellow"} + :array_field [{"color" "yellow"}]}) + {:map-field {"banana" {:color "yellow"} "ripe b4nana$" {:color "yellow-green"}} :enum-field :a-1 - :optional-field 3 - :array-field [{:color "yellow"}] - :uuid-field uuid/+null+})))) + :array-field [{:color "yellow"}]}) + "Accept maps with string keys and keys with underscores"))) + (deftest no-mangling-test + "Ensure that the deserialiser does not 'idiomise' values of enums + and keys of maps to ' the `jackdaw.serdes.avro/*mangle-names*` is + set to false." (binding [jackdaw.serdes.avro/*mangle-names* false] - (let [schema {:name "testRecord" - :type "record" - :fields [{:name "string_field" - :type "string"} - {:name "long_field" - :type "long"} - {:name "optional_field" - :type ["null" "int"] - :default nil} - {:name "nil_field" - :type "null"} - {:name "default_field" - :type "long" - :default 1} - {:name "bytes_field" - :type "bytes"} - {:name "enum_field" - :type {:type "enum" - :name "weird_values" - :symbols ["a_1" "B3"]}} - {:name "map_field" - :type ["null" {:type "map" - :values bananas-schema}]} - {:name "array_field" - :type ["null" {:name "subrecords" - :type "array" - :items "banana"}]} - {:name "uuid_field" - :type {:type "string", - :logicalType "uuid"}}]} - schema-str (json/write-str schema) - serde (->serde schema-str)] - + (let [serde (-> mangling-test-schema + json/write-str + ->serde)] (is (= (round-trip serde "bananas" - {:string_field "hello" - :long_field 3 - :optional_field 3 - :nil_field nil - :default_field 1 - :bytes_field (ByteBuffer/wrap (.getBytes "hello")) - :enum_field :a_1 - :map_field {"banana" {:color "yellow"} - "ripe b4nana$" {:color "yellow-green"}} - - :array_field [{:color "yellow"}] - :uuid_field uuid/+null+}) - {:string_field "hello" - :long_field 3 - :default_field 1 - :nil_field nil - :bytes_field (ByteBuffer/wrap (.getBytes "hello")) - :map_field {"banana" {:color "yellow"} - "ripe b4nana$" {:color "yellow-green"}} - :enum_field :a_1 - :optional_field 3 - :array_field [{:color "yellow"}] - :uuid_field uuid/+null+})) + {:enum_field :a_1 + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + + :array_field [{:color "yellow"}]}) + {:enum_field :a_1 + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + :array_field [{:color "yellow"}]}) + "Enum values are not idiomised when mangling is turned off") + (is (= (round-trip serde "bananas" - {"string_field" "hello" - "long_field" 3 - "optional_field" 3 - "nil_field" nil - "default_field" 1 - "bytes_field" (ByteBuffer/wrap (.getBytes "hello")) - "enum_field" "a_1" - "map_field" {"banana" {"color" "yellow"} - "ripe b4nana$" {"color" "yellow-green"}} - "array_field" [{"color" "yellow"}] - "uuid_field" uuid/+null+}) - {:string_field "hello" - :long_field 3 - :default_field 1 - :nil_field nil - :bytes_field (ByteBuffer/wrap (.getBytes "hello")) - :map_field {"banana" {:color "yellow"} - "ripe b4nana$" {:color "yellow-green"}} - :enum_field :a_1 - :optional_field 3 - :array_field [{:color "yellow"}] - :uuid_field uuid/+null+}))))) + {:enum_field :a_1 + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + + :array_field [{:color "yellow"}]}) + {:enum_field :a_1 + :map_field {"banana" {:color "yellow"} + "ripe b4nana$" {:color "yellow-green"}} + :array_field [{:color "yellow"}]}) + "keys of maps are not idiomised when mangling is turned off")))) + (deftest schemaless-test (let [serde (->serde nil)]