Skip to content

Commit e862200

Browse files
Add link_check action & workflow (#386)
Signed-off-by: Maximilian Sören Pollak <maximilian.pollak@qorix.com>
1 parent 7ea2056 commit e862200

File tree

7 files changed

+266
-0
lines changed

7 files changed

+266
-0
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
name: 'Link Check and Automated Issue'
2+
description: 'Checks links, parses results, and creates or updates an issue with findings.'
3+
inputs:
4+
github-token:
5+
description: 'GitHub token'
6+
required: true
7+
runs:
8+
using: "composite"
9+
steps:
10+
- name: Checkout repository
11+
uses: actions/checkout@v4.2.2
12+
13+
- name: Run LinkChecker (generates linkcheck_output.txt)
14+
shell: bash
15+
run: |
16+
chmod +x ${{ github.action_path }}/link_check.sh
17+
${{ github.action_path }}/link_check.sh
18+
19+
- name: Parse broken links (generates issue_body.md)
20+
shell: bash
21+
run: |
22+
python3 ${{ github.action_path }}/link_parser.py linkcheck_output.txt
23+
24+
- name: Create or update GitHub issue from findings
25+
if: success() && hashFiles('issue_body.md') != ''
26+
uses: actions/github-script@v7
27+
with:
28+
github-token: ${{ inputs.github-token }}
29+
script: |
30+
const fs = require('fs');
31+
const path = require('path');
32+
const body = fs.readFileSync(path.join(process.cwd(), 'issue_body.md'), 'utf-8');
33+
const title = "Automated Issue: Broken Documentation Links";
34+
35+
// Find existing open issue with the same title created by GitHub Actions bot
36+
const { data: issues } = await github.rest.issues.listForRepo({
37+
owner: context.repo.owner,
38+
repo: context.repo.repo,
39+
state: "open",
40+
creator: "github-actions[bot]",
41+
labels: undefined,
42+
});
43+
44+
const issue = issues.find(i => i.title === title);
45+
46+
if (issue) {
47+
// Update the existing issue
48+
await github.rest.issues.update({
49+
owner: context.repo.owner,
50+
repo: context.repo.repo,
51+
issue_number: issue.number,
52+
body,
53+
});
54+
} else {
55+
// Create a new issue
56+
await github.rest.issues.create({
57+
owner: context.repo.owner,
58+
repo: context.repo.repo,
59+
title,
60+
body,
61+
});
62+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/usr/bin/env bash
2+
set -e
3+
bazel run //:docs_link_check > linkcheck_output.txt || true
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import argparse
2+
import re
3+
import sys
4+
from dataclasses import dataclass
5+
from datetime import datetime
6+
7+
PARSING_STATUSES = ["broken"]
8+
9+
10+
@dataclass
11+
class BrokenLink:
12+
location: str
13+
line_nr: str
14+
reasoning: str
15+
16+
17+
def parse_broken_links(log: str) -> list[BrokenLink]:
18+
broken_links: list[BrokenLink] = []
19+
lines = log.strip().split("\n")
20+
21+
for line in lines:
22+
parts = line.split(") ")
23+
if len(parts) < 2:
24+
continue
25+
26+
location_part = parts[0].replace("(", "").strip()
27+
location = location_part.split(":")[0].strip()
28+
line_nr = location_part.split("line")[-1].strip()
29+
status_and_url_part = parts[1]
30+
31+
if not any(status in status_and_url_part for status in PARSING_STATUSES):
32+
continue
33+
status_and_url = status_and_url_part.split(" - ")
34+
if len(status_and_url) < 2:
35+
continue
36+
reasoning = status_and_url[1].strip()
37+
38+
broken_links.append(
39+
BrokenLink(
40+
location=location,
41+
line_nr=line_nr,
42+
reasoning=reasoning,
43+
)
44+
)
45+
46+
return broken_links
47+
48+
49+
def generate_markdown_table(broken_links: list[BrokenLink]) -> str:
50+
table = "| Location | Line Number | Reasoning |\n"
51+
table += "|----------|-------------|-----------|\n"
52+
53+
for link in broken_links:
54+
table += (
55+
f"| {link.location} | {link.line_nr} | {link.reasoning} |\n"
56+
)
57+
58+
return table
59+
60+
61+
def generate_issue_body(broken_links: list[BrokenLink]) -> str:
62+
markdown_table = generate_markdown_table(broken_links)
63+
return f"""
64+
# Broken Links Report.
65+
**Last updated: {datetime.now().strftime('%d-%m-%Y %H:%M')}**
66+
67+
The following broken links were detected in the documentation:
68+
{markdown_table}
69+
Please investigate and fix these issues to ensure all links are functional.
70+
Thank you!
71+
72+
---
73+
This issue will be auto updated regularly if link issues are found.
74+
You may close it if you wish, though a new one will be created if link issues are still present.
75+
76+
"""
77+
78+
79+
def strip_ansi_codes(text: str) -> str:
80+
"""Remove ANSI escape sequences from text"""
81+
ansi_escape = re.compile(r"\x1b\[[0-9;]*m")
82+
return ansi_escape.sub("", text)
83+
84+
85+
if __name__ == "__main__":
86+
argparse = argparse.ArgumentParser(
87+
description="Parse broken links from Sphinx log and generate issue body."
88+
)
89+
argparse.add_argument("logfile", type=str, help="Path to the Sphinx log file.")
90+
args = argparse.parse_args()
91+
with open(args.logfile) as f:
92+
log_content_raw = f.read()
93+
log_content = strip_ansi_codes(log_content_raw)
94+
broken_links = parse_broken_links(log_content)
95+
if not broken_links:
96+
# Nothing broken found, can exit early
97+
sys.exit(0)
98+
issue_body = generate_issue_body(broken_links)
99+
if broken_links:
100+
with open("issue_body.md", "w") as out:
101+
out.write(issue_body)

.github/workflows/link_check.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: Link Check
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
# Runs every week at 00:00 on Sunday
7+
- cron: '0 0 * * 0'
8+
9+
jobs:
10+
link-check:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Checkout repo
14+
uses: actions/checkout@v4
15+
16+
- name: Run link check action
17+
uses: ./.github/actions/link-check
18+
with:
19+
github-token: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/test_links.yml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: Link Check and Automated Issue
2+
3+
on:
4+
workflow_dispatch:
5+
6+
jobs:
7+
check-links:
8+
runs-on: ubuntu-latest
9+
outputs:
10+
should_create_issue: ${{ steps.detect.outputs.issue_needed }}
11+
steps:
12+
- name: Checkout repository
13+
uses: actions/checkout@v4.2.2
14+
15+
# Run your link checker and generate log
16+
- name: Run LinkChecker
17+
run: |
18+
bazel run //:link_check > linkcheck_output.txt
19+
continue-on-error: true
20+
21+
# Run your Python script to parse the linkcheck log and generate issue body
22+
- name: Parse broken links and generate issue body
23+
run: |
24+
python3 scripts/link_parser.py linkcheck_output.txt
25+
26+
# Check if issue_body.md exists and is not empty
27+
- name: Check for issues to report
28+
id: detect
29+
run: |
30+
if [ -s issue_body.md ]; then
31+
echo "issue_needed=true" >> "$GITHUB_OUTPUT"
32+
else
33+
echo "issue_needed=false" >> "$GITHUB_OUTPUT"
34+
fi
35+
36+
# Upload issue body artifact if present
37+
- name: Upload issue body
38+
if: steps.detect.outputs.issue_needed == 'true'
39+
uses: actions/upload-artifact@v4
40+
with:
41+
name: issue-body
42+
path: issue_body.md
43+
44+
create-issue:
45+
needs: check-links
46+
if: needs.check-links.outputs.should_create_issue == 'true'
47+
runs-on: ubuntu-latest
48+
steps:
49+
- name: Download issue body artifact
50+
uses: actions/download-artifact@v4
51+
with:
52+
name: issue-body
53+
54+
- name: Create GitHub issue from findings
55+
uses: actions/github-script@v7
56+
with:
57+
github-token: ${{ secrets.GITHUB_TOKEN }}
58+
script: |
59+
const fs = require('fs');
60+
const body = fs.readFileSync('issue_body.md', 'utf-8');
61+
github.rest.issues.create({
62+
owner: context.repo.owner,
63+
repo: context.repo.repo,
64+
title: "Automated Issue: Broken Documentation Links",
65+
body,
66+
});

docs.bzl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,19 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []):
173173
},
174174
)
175175

176+
py_binary(
177+
name = "docs_link_check",
178+
tags = ["cli_help=Verify Links inside Documentation:\nbazel run //:link_check\n (Note: this could take a long time)"],
179+
srcs = ["@score_docs_as_code//src:incremental.py"],
180+
data = data,
181+
deps = deps,
182+
env = {
183+
"SOURCE_DIRECTORY": source_dir,
184+
"DATA": str(data),
185+
"ACTION": "linkcheck",
186+
},
187+
)
188+
176189
py_binary(
177190
name = "docs_check",
178191
tags = ["cli_help=Verify documentation:\nbazel run //:docs_check"],

src/incremental.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ def get_env(name: str) -> str:
109109
builder = "html"
110110
elif action == "check":
111111
builder = "needs"
112+
elif action == "linkcheck":
113+
builder = "linkcheck"
112114
else:
113115
raise ValueError(f"Unknown action: {action}")
114116

0 commit comments

Comments
 (0)