From eb4c2594ba23694e1f8fec5a6024020b343b32e0 Mon Sep 17 00:00:00 2001 From: Lance Cain <90141999+mizady@users.noreply.github.com> Date: Fri, 20 Dec 2024 05:43:56 -0500 Subject: [PATCH] Fix and Enhance StringConverter to handle more special characters (#361) --- .../Converters/StringConverterTests.cs | 45 ++++++++- .../JsonSerializerOptionsTests.cs | 95 ++++++++++++++++++- .../Converters/StringConverter.cs | 39 ++++++-- nanoFramework.Json/JsonConvert.cs | 20 ++++ 4 files changed, 185 insertions(+), 14 deletions(-) diff --git a/nanoFramework.Json.Test/Converters/StringConverterTests.cs b/nanoFramework.Json.Test/Converters/StringConverterTests.cs index 91bd274..043af84 100644 --- a/nanoFramework.Json.Test/Converters/StringConverterTests.cs +++ b/nanoFramework.Json.Test/Converters/StringConverterTests.cs @@ -11,8 +11,10 @@ namespace nanoFramework.Json.Test.Converters public class StringConverterTests { [TestMethod] - [DataRow("\"TestJson\"", "\"TestJson\"")] + [DataRow("\"a\"", "a")] + [DataRow("\"TestJson\"", "TestJson")] [DataRow("TestJson1", "TestJson1")] + [DataRow("\"Test / solidus\"", "Test / solidus")] public void StringConverter_ToType_ShouldReturnValidData(string value, string expectedValue) { var converter = new Json.Converters.StringConverter(); @@ -31,7 +33,10 @@ public void StringConverter_ToType_ShouldReturnStringEmptyForNull() } [TestMethod] + [DataRow("a", "\"a\"")] + [DataRow("\"TestJson\"", "\"\\\"TestJson\\\"\"")] [DataRow("TestJson2", "\"TestJson2\"")] + [DataRow("Test / solidus", "\"Test / solidus\"")] public void StringConverter_ToJson_Should_ReturnValidData(string value, string expectedValue) { var converter = new Json.Converters.StringConverter(); @@ -39,5 +44,43 @@ public void StringConverter_ToJson_Should_ReturnValidData(string value, string e Assert.AreEqual(expectedValue, convertedValue); } + + [TestMethod] + [DataRow("Text\\1", "Text\\1")] // Backslash + [DataRow("Text\b1", "Text\b1")] // Backspace + [DataRow("Text\f1", "Text\f1")] // FormFeed + [DataRow("Text\r1", "Text\r1")] // CarriageReturn + [DataRow("Text\"1", "Text\"1")] // DoubleQuote + [DataRow("Text\n1", "Text\n1")] // Newline + [DataRow("Text\t1", "Text\t1")] // Tab + [DataRow("['Text3', 1]", "['Text3', 1]")] // Array + [DataRow("{\"Text1\" : \"/Text1/\"}", "{\"Text1\" : \"/Text1/\"}")] // Json + [DataRow("ä", "ä")] // Unicode + [DataRow("\"I:\\\\nano\\\\rpath\\\\to\"", "I:\\nano\\rpath\\to")] + public void StringConverter_ToType_Should_HandleSpecialCharacters(string value, string expectedValue) + { + var converter = new Json.Converters.StringConverter(); + var convertedValue = (string)converter.ToType(value); + + Assert.AreEqual(expectedValue, convertedValue); + } + + [TestMethod] + [DataRow("Text\\1", "\"Text\\\\1\"")] // Backslash + [DataRow("Text\b1", "\"Text\\b1\"")] // Backspace + [DataRow("Text\f1", "\"Text\\f1\"")] // FormFeed + [DataRow("Text\r1", "\"Text\\r1\"")] // CarriageReturn + [DataRow("Text\"1", "\"Text\\\"1\"")] // DoubleQuote + [DataRow("Text\n1", "\"Text\\n1\"")] // Newline + [DataRow("Text\t1", "\"Text\\t1\"")] // Tab + [DataRow("ä", "\"ä\"")] // Unicode + [DataRow("I:\\nano\\rpath\\to", "\"I:\\\\nano\\\\rpath\\\\to\"")] + public void StringConverter_ToJson_Should_HandleSpecialCharacters(string value, string expectedValue) + { + var converter = new Json.Converters.StringConverter(); + var convertedValue = converter.ToJson(value); + + Assert.AreEqual(expectedValue, convertedValue); + } } } diff --git a/nanoFramework.Json.Test/JsonSerializerOptionsTests.cs b/nanoFramework.Json.Test/JsonSerializerOptionsTests.cs index 78f3c8f..bf7f70a 100644 --- a/nanoFramework.Json.Test/JsonSerializerOptionsTests.cs +++ b/nanoFramework.Json.Test/JsonSerializerOptionsTests.cs @@ -375,18 +375,105 @@ public class ThingWithString } [TestMethod] - public void Can_serialize_and_deserialize_escaped_string() + [DataRow("a")] // Single character + [DataRow("1")] // Single numeric character + [DataRow("\t")] // Single Tab character + [DataRow("Testing / solidus")] // Forward slash in string + [DataRow("Testing solidus")] // Double space in string + [DataRow("Some string with \" that needs escaping")] // String containing a quote + [DataRow("Quotes in a \"string\".")] // String with escaped quotes + [DataRow("Escaped last character \n")] // Newline as the last character + [DataRow("I:\\Nano\\rApp\\app.pe")] // Backslash in string + [DataRow("Tab \t in a string \t")] // Tab character in multiple places + [DataRow("Newline \n in a string \n")] // Newline character in multiple places + [DataRow("LineFeed \f in a string \f")] // Line feed character in multiple places + [DataRow("CarriageReturn \r in a string \r")] // Carriage return character in multiple places + [DataRow("Backspace \b in a string \b")] // Backspace character in multiple places + [DataRow("TestString")] // Simple string with no special characters + [DataRow("\"TestString\"")] // String wrapped in quotes + [DataRow("\u0041")] // Unicode character (A) + [DataRow("\u2764")] // Unicode character (❤) + [DataRow("\x1B")] // Escape character (ASCII 27) + [DataRow("\x7F")] // Delete character (ASCII 127) + [DataRow("\0")] // Null character + [DataRow("")] // Empty string + [DataRow("Line 1\nLine 2\nLine 3")] // Multi-line string + [DataRow("Curly braces: { }")] // JSON-like curly braces + [DataRow("Square brackets: [ ]")] // JSON-like square brackets + [DataRow("Colon and comma: : ,")] // Colon and comma + [DataRow("Special symbols: @#$%^&*()_+~")] // Special symbols + [DataRow("English 中文 Español العربية हिंदी")] // Mixed language text + [DataRow("{\"key\": \"value\"}")] // JSON-like string + [DataRow("\"[{\"inner\":\"value\"}]\"")] // Serialized JSON-like string + [DataRow("{\"name\":\"John\",\"age\":30}")] // Serialized JSON + [DataRow("Invalid escape: \\q")] // Invalid escape sequence + [DataRow("https://example.com/api?query=escaped%20characters")] // URL + [DataRow("Unicode \u2764, Newline \n, Tab \t, Backslash \\")] // Combination of cases + public void Can_serialize_and_deserialize_object_containing_string_with_escaped_characters(string testValue) { var thing = new ThingWithString { - Value = "Some string with \" that needs escaping" + Value = testValue }; + Console.WriteLine("Original: " + testValue); + var serialized = JsonConvert.SerializeObject(thing); + Console.WriteLine("Serialized: " + serialized); + var deserialized = (ThingWithString)JsonConvert.DeserializeObject(serialized, typeof(ThingWithString)); + Console.WriteLine("Deserialized: " + deserialized.Value); + Assert.AreEqual(thing.Value, deserialized.Value); } + [TestMethod] + [DataRow("a")] // Single character + [DataRow("\t")] // Tab character + [DataRow("Testing / solidus")] // Forward slash + [DataRow("Testing solidus")] // Double space + [DataRow("Quotes in a \"string\".")] // String with escaped quotes + [DataRow("Escaped last character \n")] // Newline at the end + [DataRow("I:\\Nano\\rApp\\app.pe")] // Backslash in string + [DataRow("Tab \t in a string \ta")] // Tab character in multiple places + [DataRow("Newline \n in a string \na")] // Newline character in multiple places + [DataRow("LineFeed \f in a string \fa")] // Line feed character + [DataRow("CarriageReturn \r in a string \ra")] // Carriage return character + [DataRow("Backspace \b in a string \ba")] // Backspace character + [DataRow("TestString")] // Simple string + [DataRow("\"TestString\"")] // String wrapped in quotes + [DataRow("\u0041")] // Unicode character (A) + [DataRow("\u2764")] // Unicode character (❤) + [DataRow("\x1B")] // Escape character (ASCII 27) + [DataRow("\x7F")] // Delete character (ASCII 127) + [DataRow("\0")] // Null character + [DataRow("")] // Empty string + [DataRow("Line 1\nLine 2\nLine 3")] // Multi-line string + [DataRow("Curly braces: { }")] // JSON-like curly braces + [DataRow("Square brackets: [ ]")] // JSON-like square brackets + [DataRow("Colon and comma: : ,")] // Colon and comma + [DataRow("Special symbols: @#$%^&*()_+~")] // Special symbols + [DataRow("English 中文 Español العربية हिंदी")] // Mixed language text + [DataRow("{\"key\": \"value\"}")] // JSON-like string + [DataRow("\"[{\"inner\":\"value\"}]\"")] // Serialized JSON-like string + [DataRow("{\"name\":\"John\",\"age\":30}")] // Serialized JSON + [DataRow("Invalid escape: \\q")] // Invalid escape sequence + [DataRow("https://example.com/api?query=escaped%20characters")] // URL + [DataRow("Unicode \u2764, Newline \n, Tab \t, Backslash \\")] // Combination of cases + [DataRow("\"\\\"TestJson\\\"\"")] // Double escaped string + public void Can_serialize_and_deserialize_string_with_escaped_characters(string testValue) + { + Console.WriteLine("Original: " + testValue); + + var serialized = JsonConvert.SerializeObject(testValue); + Console.WriteLine("Serialized: " + serialized); + + var deserialized = (string)JsonConvert.DeserializeObject(serialized, typeof(string)); + Console.WriteLine("Deserialized: " + deserialized); + + Assert.AreEqual(testValue, deserialized); + } + [TestMethod] public void Can_serialize_and_deserialize_complex_object() { @@ -1037,8 +1124,8 @@ public void CanDeserializeInvocationReceiveMessage_05() string arg1 = (string)JsonConvert.DeserializeObject(JsonConvert.SerializeObject(dserResult.arguments[1]), typeof(string)); - Assert.AreEqual(arg0, "\"I_am_a_string\"", $"arg0 has unexpected value: {arg0}"); - Assert.AreEqual(arg1, "\"I_am_another_string\"", $"arg1 has unexpected value: {arg1}"); + Assert.AreEqual(arg0, "I_am_a_string", $"arg0 has unexpected value: {arg0}"); + Assert.AreEqual(arg1, "I_am_another_string", $"arg1 has unexpected value: {arg1}"); } [TestMethod] diff --git a/nanoFramework.Json/Converters/StringConverter.cs b/nanoFramework.Json/Converters/StringConverter.cs index 283b2eb..cca726e 100644 --- a/nanoFramework.Json/Converters/StringConverter.cs +++ b/nanoFramework.Json/Converters/StringConverter.cs @@ -15,6 +15,10 @@ internal sealed class StringConverter : IConverter { {'\n', 'n'}, {'\r', 'r'}, + {'\b', 'b' }, + {'\f', 'f' }, + {'\t', 't' }, + {'\\', '\\' }, {'\"', '"' } }; @@ -92,13 +96,22 @@ public object ToType(object value) } var sourceString = value.ToString(); + + // String by default has escaped \" at beggining and end, just remove them + // if they have already been removed, string has likely already been deserialized, + // and if so, then we just return it. + if (!sourceString.StartsWith("\"") && !sourceString.EndsWith("\"")) + { + return sourceString; + } + string resultString = sourceString.Substring(1, sourceString.Length - 2); + + // No characters to escape so we short circuit the character loop if (!StringContainsCharactersToEscape(sourceString, true)) { - return value; + return resultString; } - //String by default has escaped \" at beggining and end, just remove them - var resultString = sourceString.Substring(1, sourceString.Length - 2); var newString = new StringBuilder(); //Last character can not be escaped, because it's last one for (int i = 0; i < resultString.Length - 1; i++) @@ -106,16 +119,24 @@ public object ToType(object value) var curChar = resultString[i]; var nextChar = resultString[i + 1]; - if (curChar == '\\') + if (curChar != '\\') { - var charToAppend = GetEscapableCharKeyBasedOnValue(nextChar); - newString.Append(charToAppend); - i++; + newString.Append(curChar); continue; } - newString.Append(curChar); + + var charToAppend = GetEscapableCharKeyBasedOnValue(nextChar); + newString.Append(charToAppend); + i++; + + // If the end of the string is an escapped character, return the string + if (i == resultString.Length - 1) + { + return newString.ToString(); + } } - //Append last character skkiped by loop + + //Append last character skipped by loop newString.Append(resultString[resultString.Length - 1]); return newString.ToString(); } diff --git a/nanoFramework.Json/JsonConvert.cs b/nanoFramework.Json/JsonConvert.cs index 2a320ea..34722f6 100644 --- a/nanoFramework.Json/JsonConvert.cs +++ b/nanoFramework.Json/JsonConvert.cs @@ -1103,6 +1103,18 @@ private static LexToken GetNextTokenInternal(ref int jsonPos, ref byte[] jsonByt ch = '\n'; break; + case 'b': + ch = '\b'; + break; + + case 'f': + ch = '\f'; + break; + + case '\\': + ch = '\\'; + break; + case 'u': unicodeEncoded = true; break; @@ -1239,6 +1251,14 @@ private static LexToken GetNextTokenInternal(ref int jsonPos, ref byte[] jsonByt var stringValue = sb.ToString(); + // This adds an extra set of quotes since an extra set is removed during de-serialization + if (ch == '"' && stringValue.StartsWith("\"")) + { + sb.Insert(0, "\"", 1); + sb.Append("\""); + stringValue = sb.ToString(); + } + if (DateTimeExtensions.ConvertFromString(stringValue, out _)) { return new LexToken() { TType = TokenType.Date, TValue = stringValue };