-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtoc_builder.py
executable file
·54 lines (43 loc) · 1.26 KB
/
toc_builder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python3
import sys
import lxml.html
from lxml.html import builder as E
def insert_headers(parent, headers):
for header in headers:
li = E.LI()
parent.append(li)
if "id" in header["element"].keys():
link = E.A(header["element"].text_content(), href=("#" + header["element"].get("id")))
li.append(link)
else:
li.append(E.SPAN(header["element"].text_content()))
if header["headers"]:
sub_list = lxml.html.Element(parent.tag)
li.append(sub_list)
insert_headers(sub_list, header["headers"])
def all(element):
yield element
for child in element:
yield from all(child)
header_tags = ["h2", "h3", "h4", "h5", "h6"]
if __name__ == "__main__":
html_tree = lxml.html.parse(sys.stdin)
html = html_tree.getroot()
headers = []
def find_next_headers(i, starti=0, headers=headers):
if i == starti:
return headers
else:
return find_next_headers(i, starti=(starti + 1), headers=headers[-1]["headers"])
for element in all(html):
def n():
return {
"element": element,
"headers": [],
}
if element.tag in header_tags:
selfi = header_tags.index(element.tag)
find_next_headers(selfi).append(n())
for toc in html.find_class("toc"):
insert_headers(toc, headers)
sys.stdout.write(lxml.html.tostring(html_tree).decode())