Skip to content

Commit 1c7011d

Browse files
gh-150560: Fix crash in XML parser on invalid XML with multi-byte encoding (GH-150568)
1 parent bcd29e4 commit 1c7011d

3 files changed

Lines changed: 24 additions & 0 deletions

File tree

Lib/test/test_pyexpat.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,16 @@ def test_unknown_encoding(self):
426426
with self.assertRaises(LookupError):
427427
parser.Parse(data, True)
428428

429+
@support.subTests('sample,exception', [
430+
(b'<x> \xa1</x>', UnicodeDecodeError), # crashed
431+
(b'<x> \xa1</x', UnicodeDecodeError), # crashed
432+
(b'<x> \xa1', expat.ExpatError),
433+
])
434+
def test_multibyte_encoding_errors(self, sample, exception):
435+
parser = expat.ParserCreate()
436+
data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
437+
with self.assertRaises(exception):
438+
parser.Parse(data, True)
429439

430440
class NamespaceSeparatorTest(unittest.TestCase):
431441
def test_legal(self):

Lib/test/test_xml_etree.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,17 @@ def bxml(encoding, body=''):
10641064
self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
10651065
self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
10661066

1067+
@support.subTests('sample,exception', [
1068+
(b'<x> \xa1</x>', UnicodeDecodeError), # crashed
1069+
(b'<x> \xa1</x', UnicodeDecodeError), # crashed
1070+
(b'<x> \xa1', None), # ET.ParseError
1071+
])
1072+
def test_multibyte_encoding_errors(self, sample, exception):
1073+
exception = exception or ET.ParseError
1074+
data = b'<?xml version="1.0" encoding="EUC-JP"?>\n' + sample
1075+
with self.assertRaises(exception):
1076+
ET.XML(data)
1077+
10671078
def test_methods(self):
10681079
# Test serialization methods.
10691080

Modules/pyexpat.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,6 +1473,9 @@ pyexpat_encoding_create(const char *name, PyObject *mapping)
14731473
static int
14741474
pyexpat_encoding_convert(void *data, const char *s)
14751475
{
1476+
if (PyErr_Occurred()) {
1477+
return -1;
1478+
}
14761479
pyexpat_encoding_info *info = (pyexpat_encoding_info *)data;
14771480
int i = (unsigned char)s[0];
14781481
assert(info->map[i] < -1);

0 commit comments

Comments
 (0)