-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathhugo_util_frontmatter_clearner.py
150 lines (122 loc) · 5.01 KB
/
hugo_util_frontmatter_clearner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import re
from datetime import datetime
import yaml
import argparse
import difflib
class QuotedString(str):
pass
def quoted_presenter(dumper, data):
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')
yaml.add_representer(QuotedString, quoted_presenter)
def process_frontmatter(content):
# Extract front matter
match = re.match(r'^---\n(.*?)\n---\n', content, re.DOTALL)
if not match:
return content, None
frontmatter = match.group(1)
rest_of_content = content[match.end():]
# Parse YAML front matter
try:
data = yaml.safe_load(frontmatter)
except yaml.YAMLError:
return content, None
# Remove 'images' section
if 'images' in data:
del data['images']
# Reformat date and lastmod
for date_field in ['date', 'lastmod']:
if date_field in data:
if isinstance(data[date_field], datetime):
data[date_field] = QuotedString(data[date_field].strftime("%Y-%m-%dT%H:%M:%S+01:00"))
elif isinstance(data[date_field], str):
try:
date_obj = datetime.strptime(data[date_field], "%Y-%m-%dT%H:%M:%S.%fZ")
except ValueError:
try:
date_obj = datetime.strptime(data[date_field], "%Y-%m-%dT%H:%M:%SZ")
except ValueError:
continue
data[date_field] = QuotedString(date_obj.strftime("%Y-%m-%dT%H:%M:%S+01:00"))
# Ensure required fields are present and wrap string values
required_fields = ['title', 'description', 'author', 'date', 'tags', 'cover']
for field in required_fields:
if field not in data:
data[field] = QuotedString("")
elif isinstance(data[field], str):
data[field] = QuotedString(data[field])
# Wrap string values in quotes, including nested dictionaries
def quote_strings(obj):
if isinstance(obj, dict):
return {k: quote_strings(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [quote_strings(item) for item in obj]
elif isinstance(obj, str):
return QuotedString(obj)
else:
return obj
data = quote_strings(data)
# Reorder fields
ordered_data = {key: data[key] for key in required_fields if key in data}
for key, value in data.items():
if key not in ordered_data:
ordered_data[key] = value
# Custom YAML dumper for proper indentation
class Dumper(yaml.Dumper):
def increase_indent(self, flow=False, indentless=False):
return super(Dumper, self).increase_indent(flow, False)
# Convert back to YAML
new_frontmatter = yaml.dump(ordered_data, Dumper=Dumper, sort_keys=False, allow_unicode=True, width=1000, default_flow_style=False, indent=2)
# Remove extra newlines
new_frontmatter = re.sub(r'\n\s*\n', '\n', new_frontmatter).strip()
return f"---\n{new_frontmatter}\n---\n{rest_of_content}", frontmatter
def show_diff(old, new):
diff = difflib.unified_diff(
old.splitlines(keepends=True),
new.splitlines(keepends=True),
fromfile='Old Front Matter',
tofile='New Front Matter',
n=10
)
return ''.join(diff)
def process_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
new_content, old_frontmatter = process_frontmatter(content)
if new_content != content and old_frontmatter is not None:
print(f"\nReviewing changes for: {file_path}")
print(show_diff(old_frontmatter, new_content.split('---\n')[1]))
while True:
user_input = input("Apply these changes? (y/n): ").lower()
if user_input in ['y', 'n']:
break
print("Please enter 'y' for yes or 'n' for no.")
if user_input == 'y':
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_content)
print(f"Updated: {file_path}")
else:
print(f"Skipped: {file_path}")
else:
print(f"No changes needed: {file_path}")
def process_directory(directory):
base_dir = os.path.dirname(directory)
year_prefix = os.path.basename(directory)
for item in os.listdir(base_dir):
if item.startswith(year_prefix):
item_path = os.path.join(base_dir, item)
if os.path.isdir(item_path):
index_file = os.path.join(item_path, 'index.md')
if os.path.exists(index_file):
process_file(index_file)
def main():
parser = argparse.ArgumentParser(description="Clean up Hugo blog post front matter.")
parser.add_argument("path", help="Path prefix for the directories containing Hugo blog posts")
args = parser.parse_args()
base_dir = os.path.dirname(args.path)
if not os.path.exists(base_dir):
print(f"Error: The directory '{base_dir}' does not exist.")
return
process_directory(args.path)
if __name__ == "__main__":
main()