Skip to content

Commit c8f1615

Browse files
committed
gh-139423: Fix plistlib to preserve carriage returns in XML plist round-trips
plistlib's _escape() function was normalizing \r\n to \n and \r to \n during XML plist serialization. When the plist was loaded back, the original carriage return characters were lost because expat also normalizes newlines in XML character data. Fix by encoding \r as the XML character reference 
 instead of converting it to \n. Character references are not subject to XML newline normalization, so expat correctly decodes 
 back to \r, preserving the original data during round-trips.
1 parent 8000a9d commit c8f1615

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

Lib/plistlib.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,10 @@ def _escape(text):
164164
if m is not None:
165165
raise ValueError("strings can't contain control characters; "
166166
"use bytes instead")
167-
text = text.replace("\r\n", "\n") # convert DOS line endings
168-
text = text.replace("\r", "\n") # convert Mac line endings
169167
text = text.replace("&", "&") # escape '&'
170168
text = text.replace("<", "&lt;") # escape '<'
171169
text = text.replace(">", "&gt;") # escape '>'
170+
text = text.replace("\r", "&#13;") # preserve CR via character reference
172171
return text
173172

174173
class _PlistParser:

Lib/test/test_plistlib.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -818,13 +818,26 @@ def test_controlcharacters(self):
818818
if i >= 32 or c in "\r\n\t":
819819
# \r, \n and \t are the only legal control chars in XML
820820
data = plistlib.dumps(testString, fmt=plistlib.FMT_XML)
821-
if c != "\r":
822-
self.assertEqual(plistlib.loads(data), testString)
821+
self.assertEqual(plistlib.loads(data), testString)
823822
else:
824823
with self.assertRaises(ValueError):
825824
plistlib.dumps(testString, fmt=plistlib.FMT_XML)
826825
plistlib.dumps(testString, fmt=plistlib.FMT_BINARY)
827826

827+
def test_cr_newline_roundtrip(self):
828+
# gh-139423: Carriage returns should survive XML plist round-trip.
829+
test_cases = [
830+
"hello\rworld", # standalone CR
831+
"hello\r\nworld", # CRLF
832+
"a\rb\nc\r\nd", # mixed newlines
833+
"\r", # bare CR
834+
"\r\n", # bare CRLF
835+
]
836+
for s in test_cases:
837+
with self.subTest(s=s):
838+
data = plistlib.dumps(s, fmt=plistlib.FMT_XML)
839+
self.assertEqual(plistlib.loads(data), s)
840+
828841
def test_non_bmp_characters(self):
829842
pl = {'python': '\U0001f40d'}
830843
for fmt in ALL_FORMATS:
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fixed :mod:`plistlib` to preserve carriage return characters (``\r``) during
2+
XML plist round-trips. Previously, ``\r`` and ``\r\n`` were normalized to
3+
``\n`` during serialization, causing data corruption. Carriage returns are now
4+
encoded as ``&#13;`` XML character references, which the XML parser preserves.

0 commit comments

Comments
 (0)