@@ -240,6 +240,13 @@ def __init__(self, html4tags=False, tab_width=4, safe_mode=None,
240
240
else :
241
241
self ._toc_depth = self .extras ["toc" ].get ("depth" , 6 )
242
242
243
+ if 'header-ids' in self .extras :
244
+ if not isinstance (self .extras ['header-ids' ], dict ):
245
+ self .extras ['header-ids' ] = {
246
+ 'mixed' : False ,
247
+ 'prefix' : self .extras ['header-ids' ]
248
+ }
249
+
243
250
if 'break-on-newline' in self .extras :
244
251
self .extras .setdefault ('breaks' , {})
245
252
self .extras ['breaks' ]['on_newline' ] = True
@@ -424,6 +431,17 @@ def convert(self, text):
424
431
text = self ._a_nofollow_or_blank_links .sub (r'<\1 rel="nofollow"\2' , text )
425
432
426
433
if "toc" in self .extras and self ._toc :
434
+ if self .extras ['header-ids' ].get ('mixed' ):
435
+ # TOC will only be out of order if mixed headers is enabled
436
+ def toc_sort (entry ):
437
+ '''Sort the TOC by order of appearance in text'''
438
+ return re .search (
439
+ # header tag, any attrs, the ID, any attrs, the text, close tag
440
+ r'^<(h%d).*?id=(["\'])%s\2.*>%s</\1>$' % (entry [0 ], entry [1 ], re .escape (entry [2 ])),
441
+ text , re .M
442
+ ).start ()
443
+
444
+ self ._toc .sort (key = toc_sort )
427
445
self ._toc_html = calculate_toc_html (self ._toc )
428
446
429
447
# Prepend toc html to output
@@ -783,6 +801,8 @@ def _hash_html_block_sub(self, match, raw=False):
783
801
return '' .join (["\n \n " , f_key ,
784
802
"\n \n " , middle , "\n \n " ,
785
803
l_key , "\n \n " ])
804
+ elif self .extras .get ('header-ids' , {}).get ('mixed' ) and self ._h_tag_re .match (html ):
805
+ html = self ._h_tag_re .sub (self ._h_tag_sub , html )
786
806
key = _hash_text (html )
787
807
self .html_blocks [key ] = html
788
808
return "\n \n " + key + "\n \n "
@@ -1786,6 +1806,13 @@ def header_id_from_text(self, text, prefix, n):
1786
1806
1787
1807
return header_id
1788
1808
1809
+ def _header_id_exists (self , text ):
1810
+ header_id = _slugify (text )
1811
+ prefix = self .extras ['header-ids' ].get ('prefix' )
1812
+ if prefix and isinstance (prefix , str ):
1813
+ header_id = prefix + '-' + header_id
1814
+ return header_id in self ._count_from_header_id
1815
+
1789
1816
def _toc_add_entry (self , level , id , name ):
1790
1817
if level > self ._toc_depth :
1791
1818
return
@@ -1810,6 +1837,7 @@ def _toc_add_entry(self, level, id, name):
1810
1837
_h_re_tag_friendly = re .compile (_h_re_base % '+' , re .X | re .M )
1811
1838
1812
1839
def _h_sub (self , match ):
1840
+ '''Handles processing markdown headers'''
1813
1841
if match .group (1 ) is not None and match .group (3 ) == "-" :
1814
1842
return match .group (1 )
1815
1843
elif match .group (1 ) is not None :
@@ -1827,14 +1855,45 @@ def _h_sub(self, match):
1827
1855
header_id_attr = ""
1828
1856
if "header-ids" in self .extras :
1829
1857
header_id = self .header_id_from_text (header_group ,
1830
- self .extras ["header-ids" ], n )
1858
+ self .extras ["header-ids" ]. get ( 'prefix' ) , n )
1831
1859
if header_id :
1832
1860
header_id_attr = ' id="%s"' % header_id
1833
1861
html = self ._run_span_gamut (header_group )
1834
1862
if "toc" in self .extras and header_id :
1835
1863
self ._toc_add_entry (n , header_id , html )
1836
1864
return "<h%d%s>%s</h%d>\n \n " % (n , header_id_attr , html , n )
1837
1865
1866
+ _h_tag_re = re .compile (r'''
1867
+ ^<h([1-6])(.*)> # \1 tag num, \2 attrs
1868
+ (.*) # \3 text
1869
+ </h\1>
1870
+ ''' , re .X | re .M )
1871
+
1872
+ def _h_tag_sub (self , match ):
1873
+ '''Different to `_h_sub` in that this function handles existing HTML headers'''
1874
+ text = match .string [match .start (): match .end ()]
1875
+ h_level = int (match .group (1 ))
1876
+ # extract id= attr from tag, trying to account for regex "misses"
1877
+ id_attr = (re .match (r'.*?id=(\S+)?.*' , match .group (2 ) or '' ) or '' )
1878
+ if id_attr :
1879
+ # if id attr exists, extract that
1880
+ id_attr = id_attr .group (1 ) or ''
1881
+ id_attr = id_attr .strip ('\' " ' )
1882
+ h_text = match .group (3 )
1883
+
1884
+ # check if header was already processed (ie: was a markdown header rather than HTML)
1885
+ if id_attr and self ._header_id_exists (id_attr ):
1886
+ return text
1887
+
1888
+ # generate new header id if none existed
1889
+ header_id = id_attr or self .header_id_from_text (h_text , self .extras ['header-ids' ].get ('prefix' ), h_level )
1890
+ if "toc" in self .extras :
1891
+ self ._toc_add_entry (h_level , header_id , h_text )
1892
+ if header_id and not id_attr :
1893
+ # '<h[digit]' + new ID + '...'
1894
+ return text [:3 ] + ' id="%s"' % header_id + text [3 :]
1895
+ return text
1896
+
1838
1897
def _do_headers (self , text ):
1839
1898
# Setext-style headers:
1840
1899
# Header 1
0 commit comments