Skip to content

Commit

Permalink
Fix and Enhance StringConverter to handle more special characters (#361)
Browse files Browse the repository at this point in the history
  • Loading branch information
mizady authored Dec 20, 2024
1 parent 434ec52 commit eb4c259
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 14 deletions.
45 changes: 44 additions & 1 deletion nanoFramework.Json.Test/Converters/StringConverterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ namespace nanoFramework.Json.Test.Converters
public class StringConverterTests
{
[TestMethod]
[DataRow("\"TestJson\"", "\"TestJson\"")]
[DataRow("\"a\"", "a")]
[DataRow("\"TestJson\"", "TestJson")]
[DataRow("TestJson1", "TestJson1")]
[DataRow("\"Test / solidus\"", "Test / solidus")]
public void StringConverter_ToType_ShouldReturnValidData(string value, string expectedValue)
{
var converter = new Json.Converters.StringConverter();
Expand All @@ -31,13 +33,54 @@ public void StringConverter_ToType_ShouldReturnStringEmptyForNull()
}

[TestMethod]
[DataRow("a", "\"a\"")]
[DataRow("\"TestJson\"", "\"\\\"TestJson\\\"\"")]
[DataRow("TestJson2", "\"TestJson2\"")]
[DataRow("Test / solidus", "\"Test / solidus\"")]
public void StringConverter_ToJson_Should_ReturnValidData(string value, string expectedValue)
{
var converter = new Json.Converters.StringConverter();
var convertedValue = converter.ToJson(value);

Assert.AreEqual(expectedValue, convertedValue);
}

[TestMethod]
[DataRow("Text\\1", "Text\\1")] // Backslash
[DataRow("Text\b1", "Text\b1")] // Backspace
[DataRow("Text\f1", "Text\f1")] // FormFeed
[DataRow("Text\r1", "Text\r1")] // CarriageReturn
[DataRow("Text\"1", "Text\"1")] // DoubleQuote
[DataRow("Text\n1", "Text\n1")] // Newline
[DataRow("Text\t1", "Text\t1")] // Tab
[DataRow("['Text3', 1]", "['Text3', 1]")] // Array
[DataRow("{\"Text1\" : \"/Text1/\"}", "{\"Text1\" : \"/Text1/\"}")] // Json
[DataRow("ä", "ä")] // Unicode
[DataRow("\"I:\\\\nano\\\\rpath\\\\to\"", "I:\\nano\\rpath\\to")]
public void StringConverter_ToType_Should_HandleSpecialCharacters(string value, string expectedValue)
{
var converter = new Json.Converters.StringConverter();
var convertedValue = (string)converter.ToType(value);

Assert.AreEqual(expectedValue, convertedValue);
}

[TestMethod]
[DataRow("Text\\1", "\"Text\\\\1\"")] // Backslash
[DataRow("Text\b1", "\"Text\\b1\"")] // Backspace
[DataRow("Text\f1", "\"Text\\f1\"")] // FormFeed
[DataRow("Text\r1", "\"Text\\r1\"")] // CarriageReturn
[DataRow("Text\"1", "\"Text\\\"1\"")] // DoubleQuote
[DataRow("Text\n1", "\"Text\\n1\"")] // Newline
[DataRow("Text\t1", "\"Text\\t1\"")] // Tab
[DataRow("ä", "\"ä\"")] // Unicode
[DataRow("I:\\nano\\rpath\\to", "\"I:\\\\nano\\\\rpath\\\\to\"")]
public void StringConverter_ToJson_Should_HandleSpecialCharacters(string value, string expectedValue)
{
var converter = new Json.Converters.StringConverter();
var convertedValue = converter.ToJson(value);

Assert.AreEqual(expectedValue, convertedValue);
}
}
}
95 changes: 91 additions & 4 deletions nanoFramework.Json.Test/JsonSerializerOptionsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -375,18 +375,105 @@ public class ThingWithString
}

[TestMethod]
public void Can_serialize_and_deserialize_escaped_string()
[DataRow("a")] // Single character
[DataRow("1")] // Single numeric character
[DataRow("\t")] // Single Tab character
[DataRow("Testing / solidus")] // Forward slash in string
[DataRow("Testing solidus")] // Double space in string
[DataRow("Some string with \" that needs escaping")] // String containing a quote
[DataRow("Quotes in a \"string\".")] // String with escaped quotes
[DataRow("Escaped last character \n")] // Newline as the last character
[DataRow("I:\\Nano\\rApp\\app.pe")] // Backslash in string
[DataRow("Tab \t in a string \t")] // Tab character in multiple places
[DataRow("Newline \n in a string \n")] // Newline character in multiple places
[DataRow("LineFeed \f in a string \f")] // Line feed character in multiple places
[DataRow("CarriageReturn \r in a string \r")] // Carriage return character in multiple places
[DataRow("Backspace \b in a string \b")] // Backspace character in multiple places
[DataRow("TestString")] // Simple string with no special characters
[DataRow("\"TestString\"")] // String wrapped in quotes
[DataRow("\u0041")] // Unicode character (A)
[DataRow("\u2764")] // Unicode character (❤)
[DataRow("\x1B")] // Escape character (ASCII 27)
[DataRow("\x7F")] // Delete character (ASCII 127)
[DataRow("\0")] // Null character
[DataRow("")] // Empty string
[DataRow("Line 1\nLine 2\nLine 3")] // Multi-line string
[DataRow("Curly braces: { }")] // JSON-like curly braces
[DataRow("Square brackets: [ ]")] // JSON-like square brackets
[DataRow("Colon and comma: : ,")] // Colon and comma
[DataRow("Special symbols: @#$%^&*()_+~")] // Special symbols
[DataRow("English 中文 Español العربية हिंदी")] // Mixed language text
[DataRow("{\"key\": \"value\"}")] // JSON-like string
[DataRow("\"[{\"inner\":\"value\"}]\"")] // Serialized JSON-like string
[DataRow("{\"name\":\"John\",\"age\":30}")] // Serialized JSON
[DataRow("Invalid escape: \\q")] // Invalid escape sequence
[DataRow("https://example.com/api?query=escaped%20characters")] // URL
[DataRow("Unicode \u2764, Newline \n, Tab \t, Backslash \\")] // Combination of cases
public void Can_serialize_and_deserialize_object_containing_string_with_escaped_characters(string testValue)
{
var thing = new ThingWithString
{
Value = "Some string with \" that needs escaping"
Value = testValue
};

Console.WriteLine("Original: " + testValue);

var serialized = JsonConvert.SerializeObject(thing);
Console.WriteLine("Serialized: " + serialized);

var deserialized = (ThingWithString)JsonConvert.DeserializeObject(serialized, typeof(ThingWithString));
Console.WriteLine("Deserialized: " + deserialized.Value);

Assert.AreEqual(thing.Value, deserialized.Value);
}

[TestMethod]
[DataRow("a")] // Single character
[DataRow("\t")] // Tab character
[DataRow("Testing / solidus")] // Forward slash
[DataRow("Testing solidus")] // Double space
[DataRow("Quotes in a \"string\".")] // String with escaped quotes
[DataRow("Escaped last character \n")] // Newline at the end
[DataRow("I:\\Nano\\rApp\\app.pe")] // Backslash in string
[DataRow("Tab \t in a string \ta")] // Tab character in multiple places
[DataRow("Newline \n in a string \na")] // Newline character in multiple places
[DataRow("LineFeed \f in a string \fa")] // Line feed character
[DataRow("CarriageReturn \r in a string \ra")] // Carriage return character
[DataRow("Backspace \b in a string \ba")] // Backspace character
[DataRow("TestString")] // Simple string
[DataRow("\"TestString\"")] // String wrapped in quotes
[DataRow("\u0041")] // Unicode character (A)
[DataRow("\u2764")] // Unicode character (❤)
[DataRow("\x1B")] // Escape character (ASCII 27)
[DataRow("\x7F")] // Delete character (ASCII 127)
[DataRow("\0")] // Null character
[DataRow("")] // Empty string
[DataRow("Line 1\nLine 2\nLine 3")] // Multi-line string
[DataRow("Curly braces: { }")] // JSON-like curly braces
[DataRow("Square brackets: [ ]")] // JSON-like square brackets
[DataRow("Colon and comma: : ,")] // Colon and comma
[DataRow("Special symbols: @#$%^&*()_+~")] // Special symbols
[DataRow("English 中文 Español العربية हिंदी")] // Mixed language text
[DataRow("{\"key\": \"value\"}")] // JSON-like string
[DataRow("\"[{\"inner\":\"value\"}]\"")] // Serialized JSON-like string
[DataRow("{\"name\":\"John\",\"age\":30}")] // Serialized JSON
[DataRow("Invalid escape: \\q")] // Invalid escape sequence
[DataRow("https://example.com/api?query=escaped%20characters")] // URL
[DataRow("Unicode \u2764, Newline \n, Tab \t, Backslash \\")] // Combination of cases
[DataRow("\"\\\"TestJson\\\"\"")] // Double escaped string
public void Can_serialize_and_deserialize_string_with_escaped_characters(string testValue)
{
Console.WriteLine("Original: " + testValue);

var serialized = JsonConvert.SerializeObject(testValue);
Console.WriteLine("Serialized: " + serialized);

var deserialized = (string)JsonConvert.DeserializeObject(serialized, typeof(string));
Console.WriteLine("Deserialized: " + deserialized);

Assert.AreEqual(testValue, deserialized);
}

[TestMethod]
public void Can_serialize_and_deserialize_complex_object()
{
Expand Down Expand Up @@ -1037,8 +1124,8 @@ public void CanDeserializeInvocationReceiveMessage_05()

string arg1 = (string)JsonConvert.DeserializeObject(JsonConvert.SerializeObject(dserResult.arguments[1]), typeof(string));

Assert.AreEqual(arg0, "\"I_am_a_string\"", $"arg0 has unexpected value: {arg0}");
Assert.AreEqual(arg1, "\"I_am_another_string\"", $"arg1 has unexpected value: {arg1}");
Assert.AreEqual(arg0, "I_am_a_string", $"arg0 has unexpected value: {arg0}");
Assert.AreEqual(arg1, "I_am_another_string", $"arg1 has unexpected value: {arg1}");
}

[TestMethod]
Expand Down
39 changes: 30 additions & 9 deletions nanoFramework.Json/Converters/StringConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ internal sealed class StringConverter : IConverter
{
{'\n', 'n'},
{'\r', 'r'},
{'\b', 'b' },
{'\f', 'f' },
{'\t', 't' },
{'\\', '\\' },
{'\"', '"' }
};

Expand Down Expand Up @@ -92,30 +96,47 @@ public object ToType(object value)
}

var sourceString = value.ToString();

// String by default has escaped \" at beggining and end, just remove them
// if they have already been removed, string has likely already been deserialized,
// and if so, then we just return it.
if (!sourceString.StartsWith("\"") && !sourceString.EndsWith("\""))
{
return sourceString;
}
string resultString = sourceString.Substring(1, sourceString.Length - 2);

// No characters to escape so we short circuit the character loop
if (!StringContainsCharactersToEscape(sourceString, true))
{
return value;
return resultString;
}

//String by default has escaped \" at beggining and end, just remove them
var resultString = sourceString.Substring(1, sourceString.Length - 2);
var newString = new StringBuilder();
//Last character can not be escaped, because it's last one
for (int i = 0; i < resultString.Length - 1; i++)
{
var curChar = resultString[i];
var nextChar = resultString[i + 1];

if (curChar == '\\')
if (curChar != '\\')
{
var charToAppend = GetEscapableCharKeyBasedOnValue(nextChar);
newString.Append(charToAppend);
i++;
newString.Append(curChar);
continue;
}
newString.Append(curChar);

var charToAppend = GetEscapableCharKeyBasedOnValue(nextChar);
newString.Append(charToAppend);
i++;

// If the end of the string is an escapped character, return the string
if (i == resultString.Length - 1)
{
return newString.ToString();
}
}
//Append last character skkiped by loop

//Append last character skipped by loop
newString.Append(resultString[resultString.Length - 1]);
return newString.ToString();
}
Expand Down
20 changes: 20 additions & 0 deletions nanoFramework.Json/JsonConvert.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,18 @@ private static LexToken GetNextTokenInternal(ref int jsonPos, ref byte[] jsonByt
ch = '\n';
break;

case 'b':
ch = '\b';
break;

case 'f':
ch = '\f';
break;

case '\\':
ch = '\\';
break;

case 'u':
unicodeEncoded = true;
break;
Expand Down Expand Up @@ -1239,6 +1251,14 @@ private static LexToken GetNextTokenInternal(ref int jsonPos, ref byte[] jsonByt

var stringValue = sb.ToString();

// This adds an extra set of quotes since an extra set is removed during de-serialization
if (ch == '"' && stringValue.StartsWith("\""))
{
sb.Insert(0, "\"", 1);
sb.Append("\"");
stringValue = sb.ToString();
}

if (DateTimeExtensions.ConvertFromString(stringValue, out _))
{
return new LexToken() { TType = TokenType.Date, TValue = stringValue };
Expand Down

0 comments on commit eb4c259

Please sign in to comment.