Skip to content

Commit

Permalink
Fix HTMLParser error handling which referenced Python's html.HTMLPars…
Browse files Browse the repository at this point in the history
…eError which was never raised and removed in Python 3.5.
  • Loading branch information
hodgestar committed Aug 25, 2024
1 parent ca08e57 commit 26eda4e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
10 changes: 7 additions & 3 deletions genshi/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,13 @@ def _generate():
for tag in open_tags:
yield END, QName(tag), pos
break
except html.HTMLParseError as e:
msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset)
raise ParseError(msg, self.filename, e.lineno, e.offset)
except Exception as e:
# Python simple HTMLParser does not raise detailed
# errors except in strict mode which was deprecated
# in Python 3.3 and removed in Python 3.5 and which in
# any case is not used is this code.
msg = str(e)
raise ParseError(msg, self.filename)
return Stream(_generate()).filter(_coalesce)

def __iter__(self):
Expand Down
11 changes: 11 additions & 0 deletions genshi/tests/test_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,17 @@ def test_convert_ElementTree_to_markup_stream(self):
self.assertEqual((Stream.END, QName("span")), events[4][:2])
self.assertEqual((Stream.END, QName("div")), events[5][:2])

def test_parsing_error(self):
text = u'<div></div>'.encode('utf-8')
events = HTMLParser(BytesIO(text))
self.assertRaisesRegex(
ParseError,
r"source returned bytes, but no encoding specified",
list,
events,
)


def suite():
suite = unittest.TestSuite()
suite.addTest(doctest_suite(XMLParser.__module__))
Expand Down

0 comments on commit 26eda4e

Please sign in to comment.