Skip to content

Commit b7e1d51

Browse files
authored
gh-148241: Fix json serialization for str subclasses (#148249)
Fix json serialization: no longer call str(obj) on str subclasses. Replace PyUnicodeWriter_WriteStr() with PyUnicodeWriter_WriteASCII() and private _PyUnicodeWriter_WriteStr().
1 parent 8000a9d commit b7e1d51

File tree

5 files changed

+55
-2
lines changed

5 files changed

+55
-2
lines changed

Lib/test/test_json/test_dump.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,36 @@ def __lt__(self, o):
7777
d[1337] = "true.dat"
7878
self.assertEqual(self.dumps(d, sort_keys=True), '{"1337": "true.dat"}')
7979

80+
def test_dumps_str_subclass(self):
81+
# Don't call obj.__str__() on str subclasses
82+
83+
# str subclass which returns a different string on str(obj)
84+
class StrSubclass(str):
85+
def __str__(self):
86+
return "StrSubclass"
87+
88+
obj = StrSubclass('ascii')
89+
self.assertEqual(self.dumps(obj), '"ascii"')
90+
self.assertEqual(self.dumps([obj]), '["ascii"]')
91+
self.assertEqual(self.dumps({'key': obj}), '{"key": "ascii"}')
92+
93+
obj = StrSubclass('escape\n')
94+
self.assertEqual(self.dumps(obj), '"escape\\n"')
95+
self.assertEqual(self.dumps([obj]), '["escape\\n"]')
96+
self.assertEqual(self.dumps({'key': obj}), '{"key": "escape\\n"}')
97+
98+
obj = StrSubclass('nonascii:é')
99+
self.assertEqual(self.dumps(obj, ensure_ascii=False),
100+
'"nonascii:é"')
101+
self.assertEqual(self.dumps([obj], ensure_ascii=False),
102+
'["nonascii:é"]')
103+
self.assertEqual(self.dumps({'key': obj}, ensure_ascii=False),
104+
'{"key": "nonascii:é"}')
105+
self.assertEqual(self.dumps(obj), '"nonascii:\\u00e9"')
106+
self.assertEqual(self.dumps([obj]), '["nonascii:\\u00e9"]')
107+
self.assertEqual(self.dumps({'key': obj}),
108+
'{"key": "nonascii:\\u00e9"}')
109+
80110

81111
class TestPyDump(TestDump, PyTest): pass
82112

Lib/test/test_json/test_encode_basestring_ascii.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@
33
from test.support import bigaddrspacetest
44

55

6+
# str subclass which returns a different string on str(obj)
7+
class StrSubclass(str):
8+
def __str__(self):
9+
return "StrSubclass"
10+
611
CASES = [
712
('/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
813
('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
@@ -14,6 +19,8 @@
1419
('\U0001d120', '"\\ud834\\udd20"'),
1520
('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
1621
("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
22+
# Don't call obj.__str__() on str subclasses
23+
(StrSubclass('ascii'), '"ascii"'),
1724
]
1825

1926
class TestEncodeBasestringAscii:

Lib/test/test_json/test_enum.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ class WeirdNum(float, Enum):
3131
neg_inf = NEG_INF
3232
nan = NAN
3333

34+
class StringEnum(str, Enum):
35+
COLOR = "color"
36+
3437
class TestEnum:
3538

3639
def test_floats(self):
@@ -116,5 +119,11 @@ def test_dict_values(self):
116119
self.assertEqual(nd['j'], NEG_INF)
117120
self.assertTrue(isnan(nd['n']))
118121

122+
def test_str_enum(self):
123+
obj = StringEnum.COLOR
124+
self.assertEqual(self.dumps(obj), '"color"')
125+
self.assertEqual(self.dumps([obj]), '["color"]')
126+
self.assertEqual(self.dumps({'key': obj}), '{"key": "color"}')
127+
119128
class TestPyEnum(TestEnum, PyTest): pass
120129
class TestCEnum(TestEnum, CTest): pass
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:mod:`json`: Fix serialization: no longer call ``str(obj)`` on :class:`str`
2+
subclasses. Patch by Victor Stinner.

Modules/_json.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,10 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
258258
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
259259
return -1;
260260
}
261-
if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
261+
// gh-148241: Avoid PyUnicodeWriter_WriteStr() which calls str(obj)
262+
// on str subclasses
263+
assert(PyUnicode_IS_ASCII(pystr));
264+
if (PyUnicodeWriter_WriteASCII(writer, input, input_chars) < 0) {
262265
return -1;
263266
}
264267
return PyUnicodeWriter_WriteChar(writer, '"');
@@ -399,7 +402,9 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
399402
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
400403
return -1;
401404
}
402-
if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
405+
// gh-148241: Avoid PyUnicodeWriter_WriteStr() which calls str(obj)
406+
// on str subclasses
407+
if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, pystr) < 0) {
403408
return -1;
404409
}
405410
return PyUnicodeWriter_WriteChar(writer, '"');

0 commit comments

Comments
 (0)