Skip to content

Commit efd824f

Browse files
Merge pull request #538 from Crozzers/mixed-header-ids
Include HTML headers in TOC
2 parents 182b7f5 + b93c41e commit efd824f

File tree

6 files changed

+98
-1
lines changed

6 files changed

+98
-1
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
- [pull #532] Fix #493 persisting when `code-friendly` extra enabled
99
- [pull #535] Update `_slugify` to use utf-8 encoding (issue #534)
1010
- [pull #536] Maintain order of appearance in footnotes
11+
- [pull #538] Include HTML headers in TOC
1112

1213
## python-markdown2 2.4.10
1314

lib/markdown2.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
240240
else:
241241
self._toc_depth = self.extras["toc"].get("depth", 6)
242242

243+
if 'header-ids' in self.extras:
244+
if not isinstance(self.extras['header-ids'], dict):
245+
self.extras['header-ids'] = {
246+
'mixed': False,
247+
'prefix': self.extras['header-ids']
248+
}
249+
243250
if 'break-on-newline' in self.extras:
244251
self.extras.setdefault('breaks', {})
245252
self.extras['breaks']['on_newline'] = True
@@ -424,6 +431,17 @@ def convert(self, text):
424431
text = self._a_nofollow_or_blank_links.sub(r'<\1 rel="nofollow"\2', text)
425432

426433
if "toc" in self.extras and self._toc:
434+
if self.extras['header-ids'].get('mixed'):
435+
# TOC will only be out of order if mixed headers is enabled
436+
def toc_sort(entry):
437+
'''Sort the TOC by order of appearance in text'''
438+
return re.search(
439+
# header tag, any attrs, the ID, any attrs, the text, close tag
440+
r'^<(h%d).*?id=(["\'])%s\2.*>%s</\1>$' % (entry[0], entry[1], re.escape(entry[2])),
441+
text, re.M
442+
).start()
443+
444+
self._toc.sort(key=toc_sort)
427445
self._toc_html = calculate_toc_html(self._toc)
428446

429447
# Prepend toc html to output
@@ -783,6 +801,8 @@ def _hash_html_block_sub(self, match, raw=False):
783801
return ''.join(["\n\n", f_key,
784802
"\n\n", middle, "\n\n",
785803
l_key, "\n\n"])
804+
elif self.extras.get('header-ids', {}).get('mixed') and self._h_tag_re.match(html):
805+
html = self._h_tag_re.sub(self._h_tag_sub, html)
786806
key = _hash_text(html)
787807
self.html_blocks[key] = html
788808
return "\n\n" + key + "\n\n"
@@ -1786,6 +1806,13 @@ def header_id_from_text(self, text, prefix, n):
17861806

17871807
return header_id
17881808

1809+
def _header_id_exists(self, text):
1810+
header_id = _slugify(text)
1811+
prefix = self.extras['header-ids'].get('prefix')
1812+
if prefix and isinstance(prefix, str):
1813+
header_id = prefix + '-' + header_id
1814+
return header_id in self._count_from_header_id
1815+
17891816
def _toc_add_entry(self, level, id, name):
17901817
if level > self._toc_depth:
17911818
return
@@ -1810,6 +1837,7 @@ def _toc_add_entry(self, level, id, name):
18101837
_h_re_tag_friendly = re.compile(_h_re_base % '+', re.X | re.M)
18111838

18121839
def _h_sub(self, match):
1840+
'''Handles processing markdown headers'''
18131841
if match.group(1) is not None and match.group(3) == "-":
18141842
return match.group(1)
18151843
elif match.group(1) is not None:
@@ -1827,14 +1855,45 @@ def _h_sub(self, match):
18271855
header_id_attr = ""
18281856
if "header-ids" in self.extras:
18291857
header_id = self.header_id_from_text(header_group,
1830-
self.extras["header-ids"], n)
1858+
self.extras["header-ids"].get('prefix'), n)
18311859
if header_id:
18321860
header_id_attr = ' id="%s"' % header_id
18331861
html = self._run_span_gamut(header_group)
18341862
if "toc" in self.extras and header_id:
18351863
self._toc_add_entry(n, header_id, html)
18361864
return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
18371865

1866+
_h_tag_re = re.compile(r'''
1867+
^<h([1-6])(.*)> # \1 tag num, \2 attrs
1868+
(.*) # \3 text
1869+
</h\1>
1870+
''', re.X | re.M)
1871+
1872+
def _h_tag_sub(self, match):
1873+
'''Different to `_h_sub` in that this function handles existing HTML headers'''
1874+
text = match.string[match.start(): match.end()]
1875+
h_level = int(match.group(1))
1876+
# extract id= attr from tag, trying to account for regex "misses"
1877+
id_attr = (re.match(r'.*?id=(\S+)?.*', match.group(2) or '') or '')
1878+
if id_attr:
1879+
# if id attr exists, extract that
1880+
id_attr = id_attr.group(1) or ''
1881+
id_attr = id_attr.strip('\'" ')
1882+
h_text = match.group(3)
1883+
1884+
# check if header was already processed (ie: was a markdown header rather than HTML)
1885+
if id_attr and self._header_id_exists(id_attr):
1886+
return text
1887+
1888+
# generate new header id if none existed
1889+
header_id = id_attr or self.header_id_from_text(h_text, self.extras['header-ids'].get('prefix'), h_level)
1890+
if "toc" in self.extras:
1891+
self._toc_add_entry(h_level, header_id, h_text)
1892+
if header_id and not id_attr:
1893+
# '<h[digit]' + new ID + '...'
1894+
return text[:3] + ' id="%s"' % header_id + text[3:]
1895+
return text
1896+
18381897
def _do_headers(self, text):
18391898
# Setext-style headers:
18401899
# Header 1

test/tm-cases/mixed_header_ids.html

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<h1 id="header-1">Header 1</h1>
2+
3+
<h2 id="header-2">Header 2</h2>
4+
5+
<h1 id="header-3">Header 3</h1>
6+
7+
<h4 id="header-4" class="myclass">Header 4</h4>
8+
9+
<h1 id="header-5">Header 5</h1>
10+
11+
<h6 id="my-important-id">Header 6</h6>

test/tm-cases/mixed_header_ids.opts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"extras": {"header-ids": {"mixed": True}, "toc": None}}

test/tm-cases/mixed_header_ids.text

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Header 1
2+
3+
<h2>Header 2</h2>
4+
5+
# Header 3
6+
7+
<h4 class="myclass">Header 4</h4>
8+
9+
# Header 5
10+
11+
<h6 id="my-important-id">Header 6</h6>
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<ul>
2+
<li><a href="#header-1">Header 1</a>
3+
<ul>
4+
<li><a href="#header-2">Header 2</a></li>
5+
</ul></li>
6+
<li><a href="#header-3">Header 3</a>
7+
<ul>
8+
<li><a href="#header-4">Header 4</a></li>
9+
</ul></li>
10+
<li><a href="#header-5">Header 5</a>
11+
<ul>
12+
<li><a href="#my-important-id">Header 6</a></li>
13+
</ul></li>
14+
</ul>

0 commit comments

Comments
 (0)