-
Notifications
You must be signed in to change notification settings - Fork 7
/
zettel_validate.py
134 lines (113 loc) · 4.92 KB
/
zettel_validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import re
import yaml
class ZettelValidator:
def __init__(self):
self._issues = []
self._fn = ''
self._stats = {
'good_zettels': 0,
'invalid_yaml_header': 0,
'invalid_title_format': 0,
'h1_mismatch': 0,
'missing_wikilinks': 0,
'missing_hashtags': 0,
'filename_id_mismatch': 0,
}
# Precompile regular expressions
self._yaml_header_regex = re.compile(r'^---\n(.*)\n---\n', re.DOTALL)
self._title_regex = re.compile(r'((\w{1,5}\.)([\w]{1,4}\.)+\d\w{3}) (.+)')
self._wikilink_regex = re.compile(r'\[\[(.*?)\]\]')
self._hashtag_regex = re.compile(r' (#[\w]+)')
@property
def statistics(self) -> dict:
return self._stats
@property
def status(self) -> int:
return len(self._issues)
def append_issue(self, key: str, issue: str) -> None:
self._issues.append(f"{self._fn}: {issue}")
self._stats[key] += 1
def show_issues(self) -> None:
print('\n'.join(self._issues))
def validate_yaml_header(self, text: str) -> bool:
yaml_header_match = self._yaml_header_regex.search(text)
if not yaml_header_match:
self.append_issue('invalid_yaml_header', 'Invalid or missing YAML header')
return False
yaml_header = yaml_header_match.group(1)
try:
header_dict = yaml.safe_load(yaml_header) # parse YAML header
except yaml.YAMLError:
self.append_issue('invalid_yaml_header', 'YAML header parsing exception')
return False # give up, invalid YAML header
if 'title' not in header_dict or 'reference-section-title' not in header_dict:
self.append_issue('invalid_yaml_header', 'YAML header missing title or reference-section-title')
return False
self.title = header_dict.get('title', '')
return True
def validate_title_format(self) -> bool:
title_match = self._title_regex.fullmatch(self.title)
if not title_match:
self.append_issue('invalid_title_format', 'Invalid title format')
return False
self.captured_id = title_match.group(1)
return True
def validate_filename_id_mismatch(self, fn: str) -> bool:
if fn and fn != self.captured_id:
self.append_issue('filename_id_mismatch', f'Filename ID {self.captured_id} mismatch')
return False
return True
def validate_content(self, text: str, yaml_header_end: int) -> bool:
content = text[yaml_header_end:].strip()
h1_header_match = re.match(r'# ' + re.escape(self.title), content)
if not h1_header_match:
self.append_issue('h1_mismatch', 'H1 header mismatch')
if not self._wikilink_regex.findall(content):
self.append_issue('missing_wikilinks', 'Missing wikilinks')
if not self._hashtag_regex.findall(content):
self.append_issue('missing_hashtags', 'Missing or non-indented hashtags')
return not self.status
def validate(self, text: str, fn: str = '') -> bool:
self._fn = fn
self._issues = []
if not self.validate_yaml_header(text):
self.show_issues()
return False
if not self.validate_title_format():
self.show_issues()
return False
if not self.validate_filename_id_mismatch(fn):
self.show_issues()
return False
yaml_header_match = self._yaml_header_regex.search(text)
if not self.validate_content(text, yaml_header_match.end()):
self.show_issues()
return False
self._stats['good_zettels'] += 1
return True
# Comment out the execution part to prevent running in this environment
if __name__ == "__main__":
sample_text = '''---
title: Math.2.0.21.1220.2213 Matrix Determinant Lemma
reference-section-title: References
---
# Math.2.0.21.1220.2213 Matrix Determinant Lemma
[[Game.1a.0.21.0613]] The core of a cooperative game
[[Math.0000.0000]] Mathematics
#matrices #linearalgebra'''
print(sample_text)
sample_text_id = "Math.2.0.21.1220.2213"
zv = ZettelValidator()
print(f"Validation result: {zv.validate(sample_text, sample_text_id)}") # The ID should be the filename
print(f"Validation stats: {zv.statistics}")
bad_text = '''---
title: Math.2.0.21.1220.4444 Matrix Determinant Lemma
reference-section-title: References
---
# Math.2.0.21.1220.4444 Matrix Determinant Lemma
[[Game.1a.0.21.0613]] The core of a cooperative game
[[Math.0000.0000]] Mathematics
#matrices #linearalgebra'''
print(bad_text)
print(f"Validation result: {zv.validate(bad_text,sample_text_id)}") # The ID should be the filename
print(f"Validation stats: {zv.statistics}")