-
Notifications
You must be signed in to change notification settings - Fork 0
/
broken_links_to_csv.py
66 lines (49 loc) · 1.72 KB
/
broken_links_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import csv
import sys
from typing import List
BROKEN_LINK_MESSAGE = "has a broken link: "
class BrokenLink:
source: str = ''
link: str = ''
options: str = ''
comments: str = ''
def __init__(self, source: str, link: str, options: str = '', comments: str = ''):
self.source = source
self.link = link
self.options = options
self.comments = comments
def to_csv(self):
return [self.source, self.link, self.options, self.comments]
def print_usage():
print(f"Usage: {sys.argv[0]} BLC_LOG_FILE", file=sys.stderr)
def is_a_broken_link(line: str) -> bool:
return BROKEN_LINK_MESSAGE in line
def parse_broken_link(line: str) -> BrokenLink:
line = line.replace(BROKEN_LINK_MESSAGE, '')
line = line.replace('Page ', '')
line = line.rstrip()
tokens = line.split(' ', 2)
return BrokenLink(source=tokens[0], link=tokens[1], options=tokens[2])
def write_to_csv_file(broken_links: List):
with open('blc.csv', mode='w') as blc_file:
blc_writer = csv.writer(blc_file, delimiter=';', quotechar="'")
blc_writer.writerow(['SOURCE', 'DESTINY', 'OPTIONS', 'COMMENTS'])
blc_writer.writerows([broken_link.to_csv() for broken_link in broken_links])
def main(blc_log_path):
broken_links: List = list()
try:
file = open(blc_log_path)
while line := file.readline():
if is_a_broken_link(line):
broken_links.append(parse_broken_link(line))
write_to_csv_file(broken_links)
return 0
except Exception as error:
print(error)
return 3
if __name__ == "__main__":
if len(sys.argv) != 2:
print_usage()
sys.exit(2)
else:
sys.exit(main(sys.argv[1]))