Add link_check action & workflow (#386)

MaximilianSoerenPollak · web-flow · commit e8622001d597 · 2026-02-04T20:58:02.000+01:00
Signed-off-by: Maximilian Sören Pollak &lt;maximilian.pollak@qorix.com&gt;
diff --git a/.github/actions/link-check/action.yml b/.github/actions/link-check/action.yml
@@ -0,0 +1,62 @@
+name: 'Link Check and Automated Issue'
+description: 'Checks links, parses results, and creates or updates an issue with findings.'
+inputs:
+  github-token:
+    description: 'GitHub token'
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4.2.2
+
+    - name: Run LinkChecker (generates linkcheck_output.txt)
+      shell: bash
+      run: |
+        chmod +x ${{ github.action_path }}/link_check.sh
+        ${{ github.action_path }}/link_check.sh
+
+    - name: Parse broken links (generates issue_body.md)
+      shell: bash
+      run: |
+        python3 ${{ github.action_path }}/link_parser.py linkcheck_output.txt
+
+    - name: Create or update GitHub issue from findings
+      if: success() && hashFiles('issue_body.md') != ''
+      uses: actions/github-script@v7
+      with:
+        github-token: ${{ inputs.github-token }}
+        script: |
+          const fs = require('fs');
+          const path = require('path');
+          const body = fs.readFileSync(path.join(process.cwd(), 'issue_body.md'), 'utf-8');
+          const title = "Automated Issue: Broken Documentation Links";
+
+          // Find existing open issue with the same title created by GitHub Actions bot
+          const { data: issues } = await github.rest.issues.listForRepo({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            state: "open",
+            creator: "github-actions[bot]",
+            labels: undefined,
+          });
+
+          const issue = issues.find(i => i.title === title);
+
+          if (issue) {
+            // Update the existing issue
+            await github.rest.issues.update({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issue.number,
+              body,
+            });
+          } else {
+            // Create a new issue
+            await github.rest.issues.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              title,
+              body,
+            });
+          }
diff --git a/.github/actions/link-check/link_check.sh b/.github/actions/link-check/link_check.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+set -e
+bazel run //:docs_link_check > linkcheck_output.txt || true
diff --git a/.github/actions/link-check/link_parser.py b/.github/actions/link-check/link_parser.py
@@ -0,0 +1,101 @@
+import argparse
+import re
+import sys
+from dataclasses import dataclass
+from datetime import datetime
+
+PARSING_STATUSES = ["broken"]
+
+
+@dataclass
+class BrokenLink:
+    location: str
+    line_nr: str
+    reasoning: str
+
+
+def parse_broken_links(log: str) -> list[BrokenLink]:
+    broken_links: list[BrokenLink] = []
+    lines = log.strip().split("\n")
+
+    for line in lines:
+        parts = line.split(") ")
+        if len(parts) < 2:
+            continue
+
+        location_part = parts[0].replace("(", "").strip()
+        location = location_part.split(":")[0].strip()
+        line_nr = location_part.split("line")[-1].strip()
+        status_and_url_part = parts[1]
+
+        if not any(status in status_and_url_part for status in PARSING_STATUSES):
+            continue
+        status_and_url = status_and_url_part.split(" - ")
+        if len(status_and_url) < 2:
+            continue
+        reasoning = status_and_url[1].strip()
+
+        broken_links.append(
+            BrokenLink(
+                location=location,
+                line_nr=line_nr,
+                reasoning=reasoning,
+            )
+        )
+
+    return broken_links
+
+
+def generate_markdown_table(broken_links: list[BrokenLink]) -> str:
+    table = "| Location | Line Number | Reasoning |\n"
+    table += "|----------|-------------|-----------|\n"
+
+    for link in broken_links:
+        table += (
+            f"| {link.location} | {link.line_nr} | {link.reasoning} |\n"
+        )
+
+    return table
+
+
+def generate_issue_body(broken_links: list[BrokenLink]) -> str:
+    markdown_table = generate_markdown_table(broken_links)
+    return f"""
+# Broken Links Report. 
+**Last updated: {datetime.now().strftime('%d-%m-%Y %H:%M')}**
+
+The following broken links were detected in the documentation:
+{markdown_table}
+Please investigate and fix these issues to ensure all links are functional.
+Thank you!
+
+--- 
+This issue will be auto updated regularly if link issues are found.
+You may close it if you wish, though a new one will be created if link issues are still present. 
+
+"""
+
+
+def strip_ansi_codes(text: str) -> str:
+    """Remove ANSI escape sequences from text"""
+    ansi_escape = re.compile(r"\x1b\[[0-9;]*m")
+    return ansi_escape.sub("", text)
+
+
+if __name__ == "__main__":
+    argparse = argparse.ArgumentParser(
+        description="Parse broken links from Sphinx log and generate issue body."
+    )
+    argparse.add_argument("logfile", type=str, help="Path to the Sphinx log file.")
+    args = argparse.parse_args()
+    with open(args.logfile) as f:
+        log_content_raw = f.read()
+    log_content = strip_ansi_codes(log_content_raw)
+    broken_links = parse_broken_links(log_content)
+    if not broken_links:
+        # Nothing broken found, can exit early
+        sys.exit(0)
+    issue_body = generate_issue_body(broken_links)
+    if broken_links:
+        with open("issue_body.md", "w") as out:
+            out.write(issue_body)
diff --git a/.github/workflows/link_check.yml b/.github/workflows/link_check.yml
@@ -0,0 +1,19 @@
+name: Link Check
+
+on:
+  workflow_dispatch:
+  schedule:
+    # Runs every week at 00:00 on Sunday
+    - cron: '0 0 * * 0'
+
+jobs:
+  link-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Run link check action
+        uses: ./.github/actions/link-check
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/test_links.yml b/.github/workflows/test_links.yml
@@ -0,0 +1,66 @@
+name: Link Check and Automated Issue
+
+on:
+  workflow_dispatch:
+
+jobs:
+  check-links:
+    runs-on: ubuntu-latest
+    outputs:
+      should_create_issue: ${{ steps.detect.outputs.issue_needed }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4.2.2
+
+      # Run your link checker and generate log
+      - name: Run LinkChecker
+        run: |
+          bazel run //:link_check > linkcheck_output.txt
+        continue-on-error: true
+
+      # Run your Python script to parse the linkcheck log and generate issue body
+      - name: Parse broken links and generate issue body
+        run: |
+          python3 scripts/link_parser.py linkcheck_output.txt
+
+      # Check if issue_body.md exists and is not empty
+      - name: Check for issues to report
+        id: detect
+        run: |
+          if [ -s issue_body.md ]; then
+            echo "issue_needed=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "issue_needed=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      # Upload issue body artifact if present
+      - name: Upload issue body
+        if: steps.detect.outputs.issue_needed == 'true'
+        uses: actions/upload-artifact@v4
+        with:
+          name: issue-body
+          path: issue_body.md
+
+  create-issue:
+    needs: check-links
+    if: needs.check-links.outputs.should_create_issue == 'true'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download issue body artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: issue-body
+
+      - name: Create GitHub issue from findings
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const body = fs.readFileSync('issue_body.md', 'utf-8');
+            github.rest.issues.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              title: "Automated Issue: Broken Documentation Links",
+              body,
+            });
diff --git a/docs.bzl b/docs.bzl
@@ -173,6 +173,19 @@ def docs(source_dir = "docs", data = [], deps = [], scan_code = []):
         },
     )
 
+    py_binary(
+        name = "docs_link_check",
+        tags = ["cli_help=Verify Links inside Documentation:\nbazel run //:link_check\n (Note: this could take a long time)"],
+        srcs = ["@score_docs_as_code//src:incremental.py"],
+        data = data,
+        deps = deps,
+        env = {
+            "SOURCE_DIRECTORY": source_dir,
+            "DATA": str(data),
+            "ACTION": "linkcheck",
+        },
+    )
+
     py_binary(
         name = "docs_check",
         tags = ["cli_help=Verify documentation:\nbazel run //:docs_check"],
diff --git a/src/incremental.py b/src/incremental.py
@@ -109,6 +109,8 @@ def get_env(name: str) -> str:
             builder = "html"
         elif action == "check":
             builder = "needs"
+        elif action == "linkcheck":
+            builder = "linkcheck"
         else:
             raise ValueError(f"Unknown action: {action}")
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+#!/usr/bin/env bash`
	`2`	`+set -e`
	`3`	`+bazel run //:docs_link_check > linkcheck_output.txt \|\| true`