Skip to content

Commit

Permalink
Merge branch 'master' into custom-extras
Browse files Browse the repository at this point in the history
Resolve merge conflicts for PR
  • Loading branch information
Crozzers committed Sep 24, 2023
2 parents fcaadfe + d61a7e1 commit eaa4544
Show file tree
Hide file tree
Showing 13 changed files with 108 additions and 16 deletions.
8 changes: 7 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# python-markdown2 Changelog

## python-markdown2 2.4.10 (not yet released)
## python-markdown2 2.4.11 (not yet released)

- [pull #524] Fix angles being escaped in style blocks (issue #523)
- [pull #527] Fix base64 images being corrupted in safe mode (issue #526)
- [pull #529] Add `breaks` extra with ability to hard break on backslashes (issue #525)

## python-markdown2 2.4.10

- [pull #519] Add support for custom extras
- [pull #519] Drop Python 3.5 support
Expand Down
65 changes: 50 additions & 15 deletions lib/markdown2.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@
see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
* admonitions: Enable parsing of RST admonitions.
* break-on-newline: Replace single new line characters with <br> when True
* breaks: Control where hard breaks are inserted in the markdown.
Options include:
- on_newline: Replace single new line characters with <br> when True
- on_backslash: Replace backslashes at the end of a line with <br>
* break-on-newline: Alias for the on_newline option in the breaks extra.
* code-friendly: Disable _ and __ for em and strong.
* cuddled-lists: Allow lists to be cuddled to the preceding paragraph.
* fenced-code-blocks: Allows a code block to not have to be indented
Expand Down Expand Up @@ -102,7 +106,7 @@
# not yet sure if there implications with this. Compare 'pydoc sre'
# and 'perldoc perlre'.

__version_info__ = (2, 4, 10)
__version_info__ = (2, 4, 11)
__version__ = '.'.join(map(str, __version_info__))
__author__ = "Trent Mick"

Expand Down Expand Up @@ -333,6 +337,10 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
else:
self._toc_depth = self.extras["toc"].get("depth", 6)

if 'break-on-newline' in self.extras:
self.extras.setdefault('breaks', {})
self.extras['breaks']['on_newline'] = True

if 'link-patterns' in self.extras:
# allow link patterns via extras dict without kwarg explicitly set
link_patterns = link_patterns or extras['link-patterns']
Expand All @@ -344,7 +352,6 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
self.extras['link-patterns'] = link_patterns

self._instance_extras = self.extras.copy()

self.link_patterns = link_patterns
self.footnote_title = footnote_title
self.footnote_return_symbol = footnote_return_symbol
Expand Down Expand Up @@ -802,7 +809,7 @@ def _detab(self, text):
# _block_tags_b. This way html5 tags are easy to keep track of.
_html5tags = '|article|aside|header|hgroup|footer|nav|section|figure|figcaption'

_block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del'
_block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style'
_block_tags_a += _html5tags

_strict_tag_block_re = re.compile(r"""
Expand Down Expand Up @@ -1334,13 +1341,30 @@ def _extract_url_and_title(self, text, start):
url = self._strip_anglebrackets.sub(r'\1', url)
return url, title, end_idx

# https://developer.mozilla.org/en-US/docs/web/http/basics_of_http/data_urls
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types
_data_url_re = re.compile(r'''
data:
# in format type/subtype;parameter=optional
(?P<mime>\w+/[\w+\.-]+(?:;\w+=[\w+\.-]+)?)?
# optional base64 token
(?P<token>;base64)?
,(?P<data>.*)
''', re.X)

def _protect_url(self, url):
'''
Function that passes a URL through `_html_escape_url` to remove any nasty characters,
and then hashes the now "safe" URL to prevent other safety mechanisms from tampering
with it (eg: escaping "&" in URL parameters)
'''
url = _html_escape_url(url, safe_mode=self.safe_mode)
data_url = self._data_url_re.match(url)
charset = None
if data_url is not None:
mime = data_url.group('mime') or ''
if mime.startswith('image/') and data_url.group('token') == ';base64':
charset='base64'
url = _html_escape_url(url, safe_mode=self.safe_mode, charset=charset)
key = _hash_text(url)
self._escape_table[url] = key
return key
Expand Down Expand Up @@ -2426,6 +2450,7 @@ def test(self, text):
# User facing extras
# ----------------------------------------------------------


class Admonitions(Extra):
'''
Enable parsing of RST admonitions
Expand Down Expand Up @@ -2480,15 +2505,18 @@ def run(self, text):
return self.admonitions_re.sub(self.sub, text)


class BreakOnNewline(Extra):
name = 'break-on-newline'
class Breaks(Extra):
name = 'breaks'
order = Stage.after(Stage.ITALIC_AND_BOLD)

def run(self, text):
return re.sub(r" *\n(?!\<(?:\/?(ul|ol|li))\>)", "<br%s\n" % self.md.empty_element_suffix, text)

def test(self, text):
return True
break_tag = "<br%s\n" % self.md.empty_element_suffix
# do backslashes first because on_newline inserts the break before the newline
if self.options.get('on_backslash', False):
text = re.sub(r' *\\\n', break_tag, text)
if self.options.get('on_newline', False):
text = re.sub(r" *\n(?!\<(?:\/?(ul|ol|li))\>)", break_tag, text)
return text


class CodeFriendly(ItalicAndBoldProcessor):
Expand Down Expand Up @@ -3164,7 +3192,7 @@ def test(self, text):

# Register extras
Admonitions.register()
BreakOnNewline.register()
Breaks.register()
CodeFriendly.register()
FencedCodeBlocks.register()
LinkPatterns.register()
Expand Down Expand Up @@ -3474,14 +3502,21 @@ def _xml_encode_email_char_at_random(ch):
return '&#%s;' % ord(ch)


def _html_escape_url(attr, safe_mode=False):
"""Replace special characters that are potentially malicious in url string."""
def _html_escape_url(attr, safe_mode=False, charset=None):
"""
Replace special characters that are potentially malicious in url string.
Args:
charset: don't escape characters from this charset. Currently the only
exception is for '+' when charset=='base64'
"""
escaped = (attr
.replace('"', '&quot;')
.replace('<', '&lt;')
.replace('>', '&gt;'))
if safe_mode:
escaped = escaped.replace('+', ' ')
if charset != 'base64':
escaped = escaped.replace('+', ' ')
escaped = escaped.replace("'", "&#39;")
return escaped

Expand Down
5 changes: 5 additions & 0 deletions test/tm-cases/break_on_backslash.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<p>Github flavoured markdown allows<br />
you to insert a backslash with or
without a space, which results in<br />
a hard line break, unless it has \
been escaped.</p>
1 change: 1 addition & 0 deletions test/tm-cases/break_on_backslash.opts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{'extras': {'breaks': {'on_backslash': True}}}
5 changes: 5 additions & 0 deletions test/tm-cases/break_on_backslash.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Github flavoured markdown allows \
you to insert a backslash with or
without a space, which results in\
a hard line break, unless it has \\
been escaped.
3 changes: 3 additions & 0 deletions test/tm-cases/break_on_newline_and_backslash.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<p>The breaks extra allows you to insert a hard break on newlines.<br />
You can also insert hard breaks after backslashes<br /><br />
although this will result in a double break when both are enabled.</p>
1 change: 1 addition & 0 deletions test/tm-cases/break_on_newline_and_backslash.opts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{'extras': {'breaks': {'on_backslash': True, 'on_newline': True}}}
3 changes: 3 additions & 0 deletions test/tm-cases/break_on_newline_and_backslash.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The breaks extra allows you to insert a hard break on newlines.
You can also insert hard breaks after backslashes \
although this will result in a double break when both are enabled.
3 changes: 3 additions & 0 deletions test/tm-cases/data_urls_in_safe_mode.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<p><img src="" alt="smiley" /></p>

<p><a href="#">smiley</a></p>
1 change: 1 addition & 0 deletions test/tm-cases/data_urls_in_safe_mode.opts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{'safe_mode': True}
3 changes: 3 additions & 0 deletions test/tm-cases/data_urls_in_safe_mode.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
![smiley]()

[smiley]()
13 changes: 13 additions & 0 deletions test/tm-cases/script-and-style-blocks.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<script>
if (1 > 2) {
console.log('<p>abcdef</p>');
}
</script>

<style>
pre > code {
font-family: 'Comic Sans MS';
}
</style>

<p>Some other text</p>
13 changes: 13 additions & 0 deletions test/tm-cases/script-and-style-blocks.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<script>
if (1 > 2) {
console.log('<p>abcdef</p>');
}
</script>

<style>
pre > code {
font-family: 'Comic Sans MS';
}
</style>

Some other text

0 comments on commit eaa4544

Please sign in to comment.