Skip to content

Commit 1bf9273

Browse files
committed
Add script to generate markdown from cookbooks
1 parent f60fefe commit 1bf9273

File tree

3 files changed

+93
-1
lines changed

3 files changed

+93
-1
lines changed

index.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ topics = ["RAG"]
8282
title = "Notion RAG with Gradient Models"
8383
notebook = "gradient-embeders-and-generators-for-notion-rag.ipynb"
8484
topics = ["RAG"]
85-
hide = true
85+
hidden = true
8686

8787
[[cookbook]]
8888
title = "Hacker News Summaries with Custom Components"

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nbconvert

scripts/generate_markdowns.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import argparse
2+
from pathlib import Path
3+
from subprocess import check_output
4+
5+
import tomllib
6+
from nbconvert import MarkdownExporter
7+
from nbconvert.filters.strings import get_lines
8+
9+
10+
def generate_frontmatter(notebook):
11+
last_commit_date = (
12+
check_output(f'git log -1 --pretty=format:"%cs" {notebook["file"]}'.split())
13+
.decode()
14+
.strip()
15+
)
16+
first_commit_date = (
17+
check_output(
18+
f'git log --reverse --pretty=format:"%cs" {notebook["file"]}'.split()
19+
)
20+
.decode()
21+
.strip()
22+
.splitlines()[0]
23+
)
24+
25+
frontmatter = f"""---
26+
layout: cookbook
27+
featured_image: /images/tutorial_walkthrough_thumbnail.png
28+
images: ["/images/tutorial_walkthrough_thumbnail.png"]
29+
sitemap_exclude: False
30+
colab: {notebook.get("colab")}
31+
toc: True
32+
title: "{notebook["title"]}"
33+
lastmod: {last_commit_date}
34+
created_at: {first_commit_date}
35+
download: /downloads/{notebook["file"].name}
36+
featured: {notebook.get("featured", False)}
37+
experimental: {notebook.get("experimental", False)}
38+
discuss: {notebook.get("discuss", False)}
39+
hidden: {notebook.get("hidden", False)}
40+
new: {notebook.get("new", False)}
41+
topics: {notebook.get("topics", False)}
42+
---
43+
"""
44+
return frontmatter
45+
46+
47+
def generate_markdown_from_notebook(notebook, output_path):
48+
frontmatter = generate_frontmatter(notebook)
49+
md_exporter = MarkdownExporter(exclude_output=True)
50+
body, _ = md_exporter.from_filename(f"{notebook['file']}")
51+
body = get_lines(body, start=1)
52+
print(f"Processing {notebook['file']}")
53+
filename = notebook["file"].stem
54+
with open(f"{output_path}/{filename}.md", "w", encoding="utf-8") as f:
55+
try:
56+
f.write(frontmatter + "\n\n")
57+
except IndexError as e:
58+
raise IndexError(
59+
"Can't find the header for this tutorial. Have you added it in 'scripts/generate_markdowns.py'?"
60+
) from e
61+
f.write(body)
62+
63+
64+
if __name__ == "__main__":
65+
parser = argparse.ArgumentParser()
66+
parser.add_argument("--output", dest="output", default="markdowns")
67+
args = parser.parse_args()
68+
root_path = Path(__file__).parent.parent
69+
70+
readme_file = root_path / "README.md"
71+
readme_content = readme_file.read_text()
72+
73+
index_file = root_path / "index.toml"
74+
index_data = tomllib.loads(index_file.read_text())
75+
76+
if not Path(args.output).exists():
77+
Path(args.output).mkdir(parents=True, exist_ok=True)
78+
79+
for cookbook_data in index_data["cookbook"]:
80+
data = {
81+
"file": root_path / "notebooks" / cookbook_data["notebook"],
82+
"title": cookbook_data["title"],
83+
"colab": f"{index_data["config"]["colab"]}/{cookbook_data['notebook']}",
84+
"featured": cookbook_data.get("featured", False),
85+
"experimental": cookbook_data.get("experimental", False),
86+
"discuss": cookbook_data.get("discuss", False),
87+
"hidden": cookbook_data.get("hidden", False),
88+
"new": cookbook_data.get("new", False),
89+
"topics": cookbook_data.get("topics", False),
90+
}
91+
generate_markdown_from_notebook(data, args.output)

0 commit comments

Comments
 (0)