From 4cbe3f3b5d2bb85c0957a242bfe4161bc7d78866 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sat, 4 Apr 2026 21:59:22 -0700
Subject: [PATCH 1/9] cuda_core: derive error enum explanations from bindings
 docstrings

Use cleaned driver/runtime enum __doc__ text from cuda-bindings 13.2.0+ as the primary source for CUDA error explanations in cuda_core, while freezing the 13.1.1 explanation tables as fallback for older bindings. Centralize the version-gated selection and docstring cleanup helpers, update the driver/runtime explanation modules to use them, add tests that verify representative enums expose __doc__ and that cuda_utils attaches the explanation text, and remove the obsolete enum-reformat toolshed helper script.

Made-with: Cursor
---
 .../_utils/driver_cu_result_explanations.py   |  29 ++--
 .../core/_utils/enum_explanations_helpers.py  | 127 ++++++++++++++++++
 .../_utils/runtime_cuda_error_explanations.py |  34 ++---
 cuda_core/tests/test_cuda_utils.py            |  85 +++++++-----
 .../test_utils_enum_explanations_helpers.py   | 127 ++++++++++++++++++
 toolshed/reformat_cuda_enums_as_py.py         | 112 ---------------
 6 files changed, 333 insertions(+), 181 deletions(-)
 create mode 100644 cuda_core/cuda/core/_utils/enum_explanations_helpers.py
 create mode 100644 cuda_core/tests/test_utils_enum_explanations_helpers.py
 delete mode 100755 toolshed/reformat_cuda_enums_as_py.py

diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
index 0b085520a6..5184e72650 100644
--- a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
+++ b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
@@ -1,13 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# To regenerate the dictionary below run:
-#     ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/cuda.h
-# Replace the dictionary below with the output.
-# Also update the CUDA Toolkit version number below.
+from cuda.bindings import driver
+from cuda.core._utils.enum_explanations_helpers import get_best_available_explanations
 
-# CUDA Toolkit v13.2.0
-DRIVER_CU_RESULT_EXPLANATIONS = {
+# CUDA Toolkit v13.1.1
+_FALLBACK_EXPLANATIONS = {
     0: (
         "The API call returned with no errors. In the case of query calls, this"
         " also means that the operation being queried is complete (see"
@@ -334,15 +332,12 @@
         " changes which violated constraints specific to instantiated graph update."
     ),
     911: (
-        "This indicates that an error has occurred in a device outside of GPU. It can be a"
-        " synchronous error w.r.t. CUDA API or an asynchronous error from the external device."
-        " In case of asynchronous error, it means that if cuda was waiting for an external device's"
-        " signal before consuming shared data, the external device signaled an error indicating that"
-        " the data is not valid for consumption. This leaves the process in an inconsistent"
-        " state and any further CUDA work will return the same error. To continue using CUDA,"
-        " the process must be terminated and relaunched."
-        " In case of synchronous error, it means that one or more external devices"
-        " have encountered an error and cannot complete the operation."
+        "This indicates that an async error has occurred in a device outside of CUDA."
+        " If CUDA was waiting for an external device's signal before consuming shared data,"
+        " the external device signaled an error indicating that the data is not valid for"
+        " consumption. This leaves the process in an inconsistent state and any further CUDA"
+        " work will return the same error. To continue using CUDA, the process must be"
+        " terminated and relaunched."
     ),
     912: "Indicates a kernel launch error due to cluster misconfiguration.",
     913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
@@ -356,3 +351,5 @@
     ),
     999: "This indicates that an unknown internal error has occurred.",
 }
+
+DRIVER_CU_RESULT_EXPLANATIONS = get_best_available_explanations(driver.CUresult, _FALLBACK_EXPLANATIONS)
diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
new file mode 100644
index 0000000000..8a4e8240c2
--- /dev/null
+++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
@@ -0,0 +1,127 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+"""Internal support for error-enum explanations.
+
+``cuda_core`` keeps frozen 13.1.1 fallback tables for older ``cuda-bindings``
+releases. Starting with ``cuda-bindings`` 13.2.0, driver/runtime error enums
+carry usable ``__doc__`` text. This module decides which source to use and
+normalizes generated docstrings so user-facing ``CUDAError`` messages stay
+close to the long-form explanation prose.
+
+The cleanup rules here were derived while validating docstring-vs-dict parity
+in PR #1805. Keep them narrow and remove them when codegen / fallback support is
+no longer needed.
+"""
+
+from __future__ import annotations
+
+import importlib.metadata
+import re
+from typing import Any
+
+_MIN_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (13, 2, 0)
+
+
+# ``version.pyx`` cannot be reused here (circular import via ``cuda_utils``).
+def _binding_version() -> tuple[int, int, int]:
+    """Return the installed ``cuda-bindings`` version, or a conservative old value."""
+    try:
+        parts = importlib.metadata.version("cuda-bindings").split(".")[:3]
+    except importlib.metadata.PackageNotFoundError:
+        return (0, 0, 0)  # For very old versions of cuda-python
+    return tuple(int(v) for v in parts)
+
+
+def _strip_doxygen_double_colon_prefixes(s: str) -> str:
+    """Remove Doxygen-style ``::`` before CUDA identifiers (not C++ ``Foo::Bar`` scope).
+
+    The frozen fallback tables come from CUDA header comments and therefore use
+    Doxygen ``::name`` references. Generated enum ``__doc__`` text uses Sphinx
+    roles instead, so parity checks need a small amount of normalization.
+    """
+    prev = None
+    while prev != s:
+        prev = s
+        s = re.sub(r"(?<![A-Za-z0-9_])::+([A-Za-z_][A-Za-z0-9_]*)", r"\1", s)
+    return s
+
+
+def _fix_hyphenation_wordwrap_spacing(s: str) -> str:
+    """Remove spaces around hyphens introduced by line wrapping in generated ``__doc__`` text.
+
+    This is a narrow workaround for wrapped forms such as ``non- linear`` that
+    otherwise differ from the single-line fallback prose.
+    """
+    prev = None
+    while prev != s:
+        prev = s
+        s = re.sub(r"([a-z])- ([a-z])", r"\1-\2", s)
+        s = re.sub(r"([a-z]) -([a-z])", r"\1-\2", s)
+    return s
+
+
+def clean_enum_member_docstring(doc: str | None) -> str | None:
+    """Turn an enum member ``__doc__`` into plain text.
+
+    The generated enum docstrings are already close to the fallback explanation
+    prose, but not byte-identical: they may contain Sphinx inline roles, line
+    wrapping, or a small known codegen defect. Normalize only those differences
+    so the text is suitable for user-facing error messages.
+    """
+    if doc is None:
+        return None
+    s = doc
+    # Known codegen bug on cudaErrorIncompatibleDriverContext. Remove once fixed
+    # in cuda-bindings code generation. Do not use a raw string for the needle:
+    # r"\n..." would not match the real newline present in __doc__.
+    s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ')
+    s = re.sub(
+        r":(?:py:)?(?:obj|func|meth|class|mod|data|const|exc):`([^`]+)`",
+        lambda m: re.sub(r"^~?\.", "", m.group(1)),
+        s,
+    )
+    s = re.sub(r"\*\*([^*]+)\*\*", r"\1", s)
+    s = re.sub(r"\*([^*]+)\*", r"\1", s)
+    s = re.sub(r"\s+", " ", s).strip()
+    s = _fix_hyphenation_wordwrap_spacing(s)
+    return s
+
+
+class DocstringBackedExplanations:
+    """``dict.get``-like lookup over enum-member ``__doc__`` strings.
+
+    Once the bindings-version gate says docstrings are available, use them
+    exclusively. Missing docstrings should surface as ``None`` / ``default``
+    rather than silently mixing in frozen fallback prose.
+    """
+
+    __slots__ = ("_enum_type",)
+
+    def __init__(self, enum_type: Any) -> None:
+        self._enum_type = enum_type
+
+    def get(self, code: int, default: str | None = None) -> str | None:
+        try:
+            member = self._enum_type(code)
+        except ValueError:
+            return default
+
+        raw_doc = member.__doc__
+        if raw_doc is None:
+            return default
+
+        return clean_enum_member_docstring(raw_doc)
+
+
+def get_best_available_explanations(
+    enum_type: Any, fallback: dict[int, str | tuple[str, ...]]
+) -> DocstringBackedExplanations | dict[int, str | tuple[str, ...]]:
+    """Pick one explanation source per bindings version.
+
+    ``cuda-bindings`` < 13.2.0: use the frozen 13.1.1 fallback tables.
+    ``cuda-bindings`` >= 13.2.0: use enum-member ``__doc__`` exclusively.
+    """
+    if _binding_version() < _MIN_BINDING_VERSION_FOR_ENUM_DOCSTRINGS:
+        return fallback
+    return DocstringBackedExplanations(enum_type)
diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
index 4421d50480..5f587a0350 100644
--- a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
+++ b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
@@ -1,13 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# To regenerate the dictionary below run:
-#     ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/driver_types.h
-# Replace the dictionary below with the output.
-# Also update the CUDA Toolkit version number below.
+from cuda.bindings import runtime
+from cuda.core._utils.enum_explanations_helpers import get_best_available_explanations
 
-# CUDA Toolkit v13.2.0
-RUNTIME_CUDA_ERROR_EXPLANATIONS = {
+# CUDA Toolkit v13.1.1
+_FALLBACK_EXPLANATIONS = {
     0: (
         "The API call returned with no errors. In the case of query calls, this"
         " also means that the operation being queried is complete (see"
@@ -52,11 +50,6 @@
         " requesting too many threads or blocks. See ::cudaDeviceProp for more"
         " device limitations."
     ),
-    10: (
-        "This indicates that the driver is newer than the runtime version"
-        " and returned graph node parameter information that the runtime"
-        " does not understand and is unable to translate."
-    ),
     12: (
         "This indicates that one or more of the pitch-related parameters passed"
         " to the API call is not within the acceptable range for pitch."
@@ -523,15 +516,12 @@
         " changes which violated constraints specific to instantiated graph update."
     ),
     911: (
-        "This indicates that an error has occurred in a device outside of GPU. It can be a"
-        " synchronous error w.r.t. CUDA API or an asynchronous error from the external device."
-        " In case of asynchronous error, it means that if cuda was waiting for an external device's"
-        " signal before consuming shared data, the external device signaled an error indicating that"
-        " the data is not valid for consumption. This leaves the process in an inconsistent"
-        " state and any further CUDA work will return the same error. To continue using CUDA,"
-        " the process must be terminated and relaunched."
-        " In case of synchronous error, it means that one or more external devices"
-        " have encountered an error and cannot complete the operation."
+        "This indicates that an async error has occurred in a device outside of CUDA."
+        " If CUDA was waiting for an external device's signal before consuming shared data,"
+        " the external device signaled an error indicating that the data is not valid for"
+        " consumption. This leaves the process in an inconsistent state and any further CUDA"
+        " work will return the same error. To continue using CUDA, the process must be"
+        " terminated and relaunched."
     ),
     912: ("This indicates that a kernel launch error has occurred due to cluster misconfiguration."),
     913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
@@ -549,3 +539,5 @@
         " This error return is deprecated as of CUDA 4.1."
     ),
 }
+
+RUNTIME_CUDA_ERROR_EXPLANATIONS = get_best_available_explanations(runtime.cudaError_t, _FALLBACK_EXPLANATIONS)
diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index f218182766..238a2df047 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -11,40 +11,11 @@
 from cuda.core._utils.clear_error_support import assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable
 
 
-def test_driver_cu_result_explanations_health():
-    expl_dict = cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS
-
-    # Ensure all CUresult enums are in expl_dict
-    known_codes = set()
-    for error in driver.CUresult:
-        code = int(error)
-        assert code in expl_dict
-        known_codes.add(code)
-
-    from cuda.core._utils.version import binding_version
-
-    if binding_version() >= (13, 0, 0):
-        # Ensure expl_dict has no codes not known as a CUresult enum
-        extra_expl = sorted(set(expl_dict.keys()) - known_codes)
-        assert not extra_expl
-
-
-def test_runtime_cuda_error_explanations_health():
-    expl_dict = cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS
-
-    # Ensure all cudaError_t enums are in expl_dict
-    known_codes = set()
-    for error in runtime.cudaError_t:
-        code = int(error)
-        assert code in expl_dict
-        known_codes.add(code)
-
+def _skip_if_bindings_pre_enum_docstrings():
     from cuda.core._utils.version import binding_version
 
-    if binding_version() >= (13, 0, 0):
-        # Ensure expl_dict has no codes not known as a cudaError_t enum
-        extra_expl = sorted(set(expl_dict.keys()) - known_codes)
-        assert not extra_expl
+    if binding_version() < (13, 2, 0):
+        pytest.skip("cuda-bindings < 13.2.0 may not expose enum __doc__ strings")
 
 
 def test_check_driver_error():
@@ -85,6 +56,56 @@ def test_check_runtime_error():
     assert num_unexpected < len(driver.CUresult) * 0.5
 
 
+def test_driver_error_enum_has_non_empty_docstring():
+    _skip_if_bindings_pre_enum_docstrings()
+
+    doc = driver.CUresult.CUDA_ERROR_INVALID_VALUE.__doc__
+    assert doc is not None
+    assert doc.strip() != ""
+
+
+def test_runtime_error_enum_has_non_empty_docstring():
+    _skip_if_bindings_pre_enum_docstrings()
+
+    doc = runtime.cudaError_t.cudaErrorInvalidValue.__doc__
+    assert doc is not None
+    assert doc.strip() != ""
+
+
+def test_check_driver_error_attaches_explanation():
+    error = driver.CUresult.CUDA_ERROR_INVALID_VALUE
+    name_err, name = driver.cuGetErrorName(error)
+    assert name_err == driver.CUresult.CUDA_SUCCESS
+    desc_err, desc = driver.cuGetErrorString(error)
+    assert desc_err == driver.CUresult.CUDA_SUCCESS
+    expl = cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS.get(int(error))
+    assert expl is not None
+    assert expl != desc.decode()
+
+    with pytest.raises(cuda_utils.CUDAError) as e:
+        cuda_utils._check_driver_error(error)
+
+    assert str(e.value) == f"{name.decode()}: {expl}"
+    assert str(e.value) != f"{name.decode()}: {desc.decode()}"
+
+
+def test_check_runtime_error_attaches_explanation():
+    error = runtime.cudaError_t.cudaErrorInvalidValue
+    name_err, name = runtime.cudaGetErrorName(error)
+    assert name_err == runtime.cudaError_t.cudaSuccess
+    desc_err, desc = runtime.cudaGetErrorString(error)
+    assert desc_err == runtime.cudaError_t.cudaSuccess
+    expl = cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS.get(int(error))
+    assert expl is not None
+    assert expl != desc.decode()
+
+    with pytest.raises(cuda_utils.CUDAError) as e:
+        cuda_utils._check_runtime_error(error)
+
+    assert str(e.value) == f"{name.decode()}: {expl}"
+    assert str(e.value) != f"{name.decode()}: {desc.decode()}"
+
+
 def test_precondition():
     def checker(*args, what=""):
         if args[0] < 0:
diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py
new file mode 100644
index 0000000000..c924ecb2cb
--- /dev/null
+++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py
@@ -0,0 +1,127 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import pytest
+
+from cuda.core._utils.enum_explanations_helpers import (
+    DocstringBackedExplanations,
+    _strip_doxygen_double_colon_prefixes,
+    clean_enum_member_docstring,
+)
+
+
+class _FakeEnumMember:
+    def __init__(self, doc):
+        self.__doc__ = doc
+
+
+class _FakeEnumType:
+    def __init__(self, members):
+        self._members = members
+
+    def __call__(self, code):
+        try:
+            return self._members[code]
+        except KeyError as e:
+            raise ValueError(code) from e
+
+
+@pytest.mark.parametrize(
+    ("raw", "expected"),
+    [
+        pytest.param("a\nb  c", "a b c", id="collapse_whitespace"),
+        pytest.param("  x  \n ", "x", id="strip_padding"),
+        pytest.param(
+            "see\n:py:obj:`~.cuInit()` or :py:obj:`cuCtxDestroy()`",
+            "see cuInit() or cuCtxDestroy()",
+            id="sphinx_py_obj_roles",
+        ),
+        pytest.param(
+            "x :py:func:`~.cudaMalloc()` y",
+            "x cudaMalloc() y",
+            id="sphinx_py_func_role",
+        ),
+        pytest.param("**Note:** text", "Note: text", id="strip_bold"),
+        pytest.param("*Note* text", "Note text", id="strip_italic"),
+        pytest.param("[Deprecated]\n", "[Deprecated]", id="deprecated_line"),
+        pytest.param("non- linear", "non-linear", id="hyphen_space_after"),
+        pytest.param("word -word", "word-word", id="hyphen_space_before"),
+        pytest.param(
+            'Please see\n:py:obj:`~.Interactions`with the CUDA Driver API" for more information.',
+            'Please see "Interactions with the CUDA Driver API" for more information.',
+            id="codegen_broken_interactions_role",
+        ),
+    ],
+)
+def test_clean_enum_member_docstring_examples(raw, expected):
+    assert clean_enum_member_docstring(raw) == expected
+
+
+def test_clean_enum_member_docstring_none_input():
+    assert clean_enum_member_docstring(None) is None
+
+
+@pytest.mark.parametrize(
+    ("raw", "expected"),
+    [
+        pytest.param("see ::CUDA_SUCCESS", "see CUDA_SUCCESS", id="type_ref"),
+        pytest.param("Foo::Bar unchanged", "Foo::Bar unchanged", id="cpp_scope_preserved"),
+        pytest.param("::cuInit() and ::CUstream", "cuInit() and CUstream", id="multiple_prefixes"),
+    ],
+)
+def test_strip_doxygen_double_colon_prefixes(raw, expected):
+    assert _strip_doxygen_double_colon_prefixes(raw) == expected
+
+
+def test_docstring_backed_get_returns_default_for_non_enum_code():
+    lut = DocstringBackedExplanations(_FakeEnumType({}))
+    assert lut.get(-1) is None
+    assert lut.get(-1, default="sentinel") == "sentinel"
+
+
+def test_docstring_backed_get_returns_default_for_missing_docstring():
+    lut = DocstringBackedExplanations(_FakeEnumType({7: _FakeEnumMember(None)}))
+    assert lut.get(7) is None
+    assert lut.get(7, default="sentinel") == "sentinel"
+
+
+def test_docstring_backed_get_returns_default_for_unknown_code():
+    lut = DocstringBackedExplanations(_FakeEnumType({}))
+    assert lut.get(99, default="sentinel") == "sentinel"
+
+
+def test_docstring_backed_get_returns_default_for_missing_docstring_without_fallback():
+    lut = DocstringBackedExplanations(_FakeEnumType({7: _FakeEnumMember(None)}))
+    assert lut.get(7, default="sentinel") == "sentinel"
+
+
+def test_get_best_available_explanations_uses_fallback_before_13_2(monkeypatch):
+    import cuda.core._utils.enum_explanations_helpers as cleanup
+
+    fallback = {7: "fallback text"}
+    monkeypatch.setattr(cleanup, "_binding_version", lambda: (13, 1, 1))
+    assert cleanup.get_best_available_explanations(_FakeEnumType({7: _FakeEnumMember("doc")}), fallback) is fallback
+
+
+def test_get_best_available_explanations_prefers_docstrings_from_13_2(monkeypatch):
+    import cuda.core._utils.enum_explanations_helpers as cleanup
+
+    fallback = {7: "fallback text"}
+    monkeypatch.setattr(cleanup, "_binding_version", lambda: (13, 2, 0))
+    expl = cleanup.get_best_available_explanations(
+        _FakeEnumType({7: _FakeEnumMember("clean me")}),
+        fallback,
+    )
+    assert isinstance(expl, DocstringBackedExplanations)
+    assert expl.get(7) == "clean me"
+
+
+def test_driver_cu_result_explanations_get_matches_clean_docstring():
+    pytest.importorskip("cuda.bindings")
+    from cuda.bindings import driver
+    from cuda.core._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS
+
+    e = driver.CUresult.CUDA_SUCCESS
+    code = int(e)
+    assert DRIVER_CU_RESULT_EXPLANATIONS.get(code) == clean_enum_member_docstring(e.__doc__)
diff --git a/toolshed/reformat_cuda_enums_as_py.py b/toolshed/reformat_cuda_enums_as_py.py
deleted file mode 100755
index 2b80447fd1..0000000000
--- a/toolshed/reformat_cuda_enums_as_py.py
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env python3
-
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-import sys
-from pathlib import Path
-
-
-def extract_enum_block(header_file_lines):
-    line_iter = iter(header_file_lines)
-    for line in line_iter:
-        if line == "typedef enum cudaError_enum {":
-            closing_line = "} CUresult;"
-            python_dict_name = "DRIVER_CU_RESULT_EXPLANATIONS"
-            break
-        if line == "enum __device_builtin__ cudaError":
-            line = next(line_iter)
-            assert line == "{", line
-            closing_line = "};"
-            python_dict_name = "RUNTIME_CUDA_ERROR_EXPLANATIONS"
-            break
-    else:
-        raise RuntimeError("Opening line not found.")
-    block = []
-    for line in line_iter:
-        if line == closing_line:
-            break
-        block.append(line)
-    else:
-        raise RuntimeError("Closing line not found.")
-    return python_dict_name, block
-
-
-def parse_enum_doc_and_value_pairs(enum_block):
-    entries = []
-    comment_lines = []
-    inside_comment = False
-
-    for line in enum_block:
-        stripped = line.strip()
-        if not stripped:
-            continue
-
-        if stripped.startswith("/**"):
-            inside_comment = True
-            comment = stripped[3:].lstrip()
-            if comment:
-                comment_lines = [comment]
-        elif inside_comment:
-            if stripped.endswith("*/"):
-                comment = stripped[:-2].strip()
-                if comment:
-                    comment_lines.append(comment)
-                inside_comment = False
-            else:
-                comment_lines.append(stripped.lstrip("*").strip())
-        elif stripped:
-            assert stripped.count(",") <= 1, line
-            stripped = stripped.replace(",", "")
-            flds = stripped.split(" = ")
-            assert len(flds) == 2, line
-            try:
-                val = int(flds[1].strip())
-            except Exception as e:
-                raise RuntimeError(f"Unexpected {line=!r}") from e
-            entries.append((int(val), comment_lines))
-            comment_lines = []
-
-    return entries
-
-
-def emit_python_dict(python_dict_name, entries):
-    print(f"{python_dict_name} = {{")
-    for val, lines in entries:
-        py_lines = []
-        continuation_space = ""
-        for line in lines:
-            if line == r"\deprecated":
-                continue
-            mod_line = line.replace("\\ref ", "")
-            assert "\\" not in mod_line, line
-            mod_line = mod_line.replace('"', '\\"')
-            py_lines.append(f'"{continuation_space}{mod_line}"')
-            continuation_space = " "
-        assert py_lines, lines
-        if len(py_lines) == 1:
-            print(f"    {val}: {py_lines[0]},")
-        else:
-            print(f"    {val}: (")
-            for py_line in py_lines:
-                print(f"        {py_line}")
-            print("    ),")
-    print("}")
-
-
-def run(args):
-    if len(args) != 1:
-        print(
-            "Usage: reformat_cuda_enums_as_py.py /path/to/cuda.h|driver_types.h",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-
-    header_file_text = Path(sys.argv[1]).read_text().splitlines()
-    python_dict_name, enum_block = extract_enum_block(header_file_text)
-    entries = parse_enum_doc_and_value_pairs(enum_block)
-    emit_python_dict(python_dict_name, entries)
-
-
-if __name__ == "__main__":
-    run(sys.argv[1:])

From 8b0c526f1020c1fbda17c60f5a69b090c6b65dfc Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 5 Apr 2026 09:08:25 -0700
Subject: [PATCH 2/9] cuda_core: recognize 12.9.6 enum docstrings

Treat the 12.9.6 backport line as docstring-capable and reuse the same version predicate in tests so error explanations follow the bindings releases that already expose usable enum docs.

Made-with: Cursor
---
 .../core/_utils/enum_explanations_helpers.py  | 25 ++++++++----
 cuda_core/tests/test_cuda_utils.py            |  5 ++-
 .../test_utils_enum_explanations_helpers.py   | 39 ++++++++++++++-----
 3 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
index 8a4e8240c2..414c111fce 100644
--- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
+++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
@@ -4,9 +4,10 @@
 """Internal support for error-enum explanations.
 
 ``cuda_core`` keeps frozen 13.1.1 fallback tables for older ``cuda-bindings``
-releases. Starting with ``cuda-bindings`` 13.2.0, driver/runtime error enums
-carry usable ``__doc__`` text. This module decides which source to use and
-normalizes generated docstrings so user-facing ``CUDAError`` messages stay
+releases. Driver/runtime error enums carry usable ``__doc__`` text starting in
+the 12.x backport line at ``cuda-bindings`` 12.9.6, and in the mainline 13.x
+series at ``cuda-bindings`` 13.2.0. This module decides which source to use
+and normalizes generated docstrings so user-facing ``CUDAError`` messages stay
 close to the long-form explanation prose.
 
 The cleanup rules here were derived while validating docstring-vs-dict parity
@@ -20,7 +21,8 @@
 import re
 from typing import Any
 
-_MIN_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (13, 2, 0)
+_MIN_12X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (12, 9, 6)
+_MIN_13X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (13, 2, 0)
 
 
 # ``version.pyx`` cannot be reused here (circular import via ``cuda_utils``).
@@ -33,6 +35,14 @@ def _binding_version() -> tuple[int, int, int]:
     return tuple(int(v) for v in parts)
 
 
+def _binding_version_has_usable_enum_docstrings(version: tuple[int, int, int]) -> bool:
+    """Whether released bindings are known to carry usable error-enum ``__doc__`` text."""
+    return (
+        _MIN_12X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS <= version < (13, 0, 0)
+        or version >= _MIN_13X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS
+    )
+
+
 def _strip_doxygen_double_colon_prefixes(s: str) -> str:
     """Remove Doxygen-style ``::`` before CUDA identifiers (not C++ ``Foo::Bar`` scope).
 
@@ -119,9 +129,10 @@ def get_best_available_explanations(
 ) -> DocstringBackedExplanations | dict[int, str | tuple[str, ...]]:
     """Pick one explanation source per bindings version.
 
-    ``cuda-bindings`` < 13.2.0: use the frozen 13.1.1 fallback tables.
-    ``cuda-bindings`` >= 13.2.0: use enum-member ``__doc__`` exclusively.
+    Use enum-member ``__doc__`` only for bindings versions known to expose
+    usable per-member text (12.9.6+ in the 12.x backport line, 13.2.0+ in the
+    13.x mainline). Otherwise keep using the frozen 13.1.1 fallback tables.
     """
-    if _binding_version() < _MIN_BINDING_VERSION_FOR_ENUM_DOCSTRINGS:
+    if not _binding_version_has_usable_enum_docstrings(_binding_version()):
         return fallback
     return DocstringBackedExplanations(enum_type)
diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index 238a2df047..7dc3703749 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -12,10 +12,11 @@
 
 
 def _skip_if_bindings_pre_enum_docstrings():
+    from cuda.core._utils.enum_explanations_helpers import _binding_version_has_usable_enum_docstrings
     from cuda.core._utils.version import binding_version
 
-    if binding_version() < (13, 2, 0):
-        pytest.skip("cuda-bindings < 13.2.0 may not expose enum __doc__ strings")
+    if not _binding_version_has_usable_enum_docstrings(binding_version()):
+        pytest.skip("cuda-bindings version does not expose usable enum __doc__ strings")
 
 
 def test_check_driver_error():
diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py
index c924ecb2cb..e4d7faf987 100644
--- a/cuda_core/tests/test_utils_enum_explanations_helpers.py
+++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py
@@ -6,6 +6,7 @@
 
 from cuda.core._utils.enum_explanations_helpers import (
     DocstringBackedExplanations,
+    _binding_version_has_usable_enum_docstrings,
     _strip_doxygen_double_colon_prefixes,
     clean_enum_member_docstring,
 )
@@ -96,25 +97,43 @@ def test_docstring_backed_get_returns_default_for_missing_docstring_without_fall
     assert lut.get(7, default="sentinel") == "sentinel"
 
 
-def test_get_best_available_explanations_uses_fallback_before_13_2(monkeypatch):
-    import cuda.core._utils.enum_explanations_helpers as cleanup
-
-    fallback = {7: "fallback text"}
-    monkeypatch.setattr(cleanup, "_binding_version", lambda: (13, 1, 1))
-    assert cleanup.get_best_available_explanations(_FakeEnumType({7: _FakeEnumMember("doc")}), fallback) is fallback
+@pytest.mark.parametrize(
+    ("version", "expected"),
+    [
+        pytest.param((12, 9, 5), False, id="before_12_9_6"),
+        pytest.param((12, 9, 6), True, id="from_12_9_6"),
+        pytest.param((13, 0, 0), False, id="13_0_mainline_gap"),
+        pytest.param((13, 1, 1), False, id="13_1_1"),
+        pytest.param((13, 2, 0), True, id="from_13_2_0"),
+    ],
+)
+def test_binding_version_has_usable_enum_docstrings(version, expected):
+    assert _binding_version_has_usable_enum_docstrings(version) is expected
 
 
-def test_get_best_available_explanations_prefers_docstrings_from_13_2(monkeypatch):
+@pytest.mark.parametrize(
+    ("version", "expects_docstrings"),
+    [
+        pytest.param((12, 9, 5), False, id="before_12_9_6"),
+        pytest.param((12, 9, 6), True, id="from_12_9_6"),
+        pytest.param((13, 0, 0), False, id="13_0_mainline_gap"),
+        pytest.param((13, 2, 0), True, id="from_13_2_0"),
+    ],
+)
+def test_get_best_available_explanations_switches_by_version(monkeypatch, version, expects_docstrings):
     import cuda.core._utils.enum_explanations_helpers as cleanup
 
     fallback = {7: "fallback text"}
-    monkeypatch.setattr(cleanup, "_binding_version", lambda: (13, 2, 0))
+    monkeypatch.setattr(cleanup, "_binding_version", lambda: version)
     expl = cleanup.get_best_available_explanations(
         _FakeEnumType({7: _FakeEnumMember("clean me")}),
         fallback,
     )
-    assert isinstance(expl, DocstringBackedExplanations)
-    assert expl.get(7) == "clean me"
+    if expects_docstrings:
+        assert isinstance(expl, DocstringBackedExplanations)
+        assert expl.get(7) == "clean me"
+    else:
+        assert expl is fallback
 
 
 def test_driver_cu_result_explanations_get_matches_clean_docstring():

From 4fde3d6d62cb652542260d3f7c6db75e225a80a8 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 5 Apr 2026 11:49:34 -0700
Subject: [PATCH 3/9] cuda_core: drop unused enum doc cleanup helper

Remove the old Doxygen ``::`` normalization path now that error explanations no longer depend on dict-vs-docstring parity checks. This keeps the helper focused on the cleanup rules that still affect user-facing CUDAError messages.

Made-with: Cursor
---
 .../core/_utils/enum_explanations_helpers.py  | 32 ++++++-------------
 .../test_utils_enum_explanations_helpers.py   | 13 --------
 2 files changed, 9 insertions(+), 36 deletions(-)

diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
index 414c111fce..29f48ce843 100644
--- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
+++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
@@ -8,11 +8,11 @@
 the 12.x backport line at ``cuda-bindings`` 12.9.6, and in the mainline 13.x
 series at ``cuda-bindings`` 13.2.0. This module decides which source to use
 and normalizes generated docstrings so user-facing ``CUDAError`` messages stay
-close to the long-form explanation prose.
+presentable.
 
-The cleanup rules here were derived while validating docstring-vs-dict parity
-in PR #1805. Keep them narrow and remove them when codegen / fallback support is
-no longer needed.
+The cleanup rules here were derived while validating generated enum docstrings
+in PR #1805. Keep them narrow and remove them when codegen quirks or fallback
+support are no longer needed.
 """
 
 from __future__ import annotations
@@ -43,25 +43,11 @@ def _binding_version_has_usable_enum_docstrings(version: tuple[int, int, int]) -
     )
 
 
-def _strip_doxygen_double_colon_prefixes(s: str) -> str:
-    """Remove Doxygen-style ``::`` before CUDA identifiers (not C++ ``Foo::Bar`` scope).
-
-    The frozen fallback tables come from CUDA header comments and therefore use
-    Doxygen ``::name`` references. Generated enum ``__doc__`` text uses Sphinx
-    roles instead, so parity checks need a small amount of normalization.
-    """
-    prev = None
-    while prev != s:
-        prev = s
-        s = re.sub(r"(?<![A-Za-z0-9_])::+([A-Za-z_][A-Za-z0-9_]*)", r"\1", s)
-    return s
-
-
 def _fix_hyphenation_wordwrap_spacing(s: str) -> str:
     """Remove spaces around hyphens introduced by line wrapping in generated ``__doc__`` text.
 
     This is a narrow workaround for wrapped forms such as ``non- linear`` that
-    otherwise differ from the single-line fallback prose.
+    would otherwise look awkward in user-facing messages.
     """
     prev = None
     while prev != s:
@@ -74,10 +60,10 @@ def _fix_hyphenation_wordwrap_spacing(s: str) -> str:
 def clean_enum_member_docstring(doc: str | None) -> str | None:
     """Turn an enum member ``__doc__`` into plain text.
 
-    The generated enum docstrings are already close to the fallback explanation
-    prose, but not byte-identical: they may contain Sphinx inline roles, line
-    wrapping, or a small known codegen defect. Normalize only those differences
-    so the text is suitable for user-facing error messages.
+    The generated enum docstrings are already close to user-facing prose, but
+    they may contain Sphinx inline roles, line wrapping, or a small known
+    codegen defect. Normalize only those differences so the text is suitable
+    for error messages.
     """
     if doc is None:
         return None
diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py
index e4d7faf987..2c65285322 100644
--- a/cuda_core/tests/test_utils_enum_explanations_helpers.py
+++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py
@@ -7,7 +7,6 @@
 from cuda.core._utils.enum_explanations_helpers import (
     DocstringBackedExplanations,
     _binding_version_has_usable_enum_docstrings,
-    _strip_doxygen_double_colon_prefixes,
     clean_enum_member_docstring,
 )
 
@@ -63,18 +62,6 @@ def test_clean_enum_member_docstring_none_input():
     assert clean_enum_member_docstring(None) is None
 
 
-@pytest.mark.parametrize(
-    ("raw", "expected"),
-    [
-        pytest.param("see ::CUDA_SUCCESS", "see CUDA_SUCCESS", id="type_ref"),
-        pytest.param("Foo::Bar unchanged", "Foo::Bar unchanged", id="cpp_scope_preserved"),
-        pytest.param("::cuInit() and ::CUstream", "cuInit() and CUstream", id="multiple_prefixes"),
-    ],
-)
-def test_strip_doxygen_double_colon_prefixes(raw, expected):
-    assert _strip_doxygen_double_colon_prefixes(raw) == expected
-
-
 def test_docstring_backed_get_returns_default_for_non_enum_code():
     lut = DocstringBackedExplanations(_FakeEnumType({}))
     assert lut.get(-1) is None

From 74479647918172aa1b68b11be78a8dea163133eb Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 5 Apr 2026 12:02:57 -0700
Subject: [PATCH 4/9] cuda_core: clarify enum explanation helper docs

Clarify that DocstringBackedExplanations is a compatibility shim for the existing ``.get(int(error))`` lookup shape, and trim a low-value implementation note from the docstring cleanup workaround comment.

Made-with: Cursor
---
 cuda_core/cuda/core/_utils/enum_explanations_helpers.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
index 29f48ce843..c58c1c9aee 100644
--- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
+++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
@@ -69,8 +69,7 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
         return None
     s = doc
     # Known codegen bug on cudaErrorIncompatibleDriverContext. Remove once fixed
-    # in cuda-bindings code generation. Do not use a raw string for the needle:
-    # r"\n..." would not match the real newline present in __doc__.
+    # in cuda-bindings code generation.
     s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ')
     s = re.sub(
         r":(?:py:)?(?:obj|func|meth|class|mod|data|const|exc):`([^`]+)`",
@@ -85,11 +84,9 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
 
 
 class DocstringBackedExplanations:
-    """``dict.get``-like lookup over enum-member ``__doc__`` strings.
+    """Compatibility shim exposing enum-member ``__doc__`` text via ``dict.get``.
 
-    Once the bindings-version gate says docstrings are available, use them
-    exclusively. Missing docstrings should surface as ``None`` / ``default``
-    rather than silently mixing in frozen fallback prose.
+    Keeps the existing ``.get(int(error))`` lookup shape used by ``cuda_utils.pyx``.
     """
 
     __slots__ = ("_enum_type",)

From cddb5652c1ab72ff88882363d96aa0907a8dd609 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Sun, 5 Apr 2026 12:29:27 -0700
Subject: [PATCH 5/9] cuda_core: trim enum explanation helper tests

Remove redundant helper coverage now that DocstringBackedExplanations.get() and clean_enum_member_docstring() are already exercised elsewhere, and simplify the remaining test module imports.

Made-with: Cursor
---
 .../test_utils_enum_explanations_helpers.py   | 27 +++----------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py
index 2c65285322..da70ef6e58 100644
--- a/cuda_core/tests/test_utils_enum_explanations_helpers.py
+++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from cuda.core._utils import enum_explanations_helpers
 from cuda.core._utils.enum_explanations_helpers import (
     DocstringBackedExplanations,
     _binding_version_has_usable_enum_docstrings,
@@ -74,16 +75,6 @@ def test_docstring_backed_get_returns_default_for_missing_docstring():
     assert lut.get(7, default="sentinel") == "sentinel"
 
 
-def test_docstring_backed_get_returns_default_for_unknown_code():
-    lut = DocstringBackedExplanations(_FakeEnumType({}))
-    assert lut.get(99, default="sentinel") == "sentinel"
-
-
-def test_docstring_backed_get_returns_default_for_missing_docstring_without_fallback():
-    lut = DocstringBackedExplanations(_FakeEnumType({7: _FakeEnumMember(None)}))
-    assert lut.get(7, default="sentinel") == "sentinel"
-
-
 @pytest.mark.parametrize(
     ("version", "expected"),
     [
@@ -108,11 +99,9 @@ def test_binding_version_has_usable_enum_docstrings(version, expected):
     ],
 )
 def test_get_best_available_explanations_switches_by_version(monkeypatch, version, expects_docstrings):
-    import cuda.core._utils.enum_explanations_helpers as cleanup
-
     fallback = {7: "fallback text"}
-    monkeypatch.setattr(cleanup, "_binding_version", lambda: version)
-    expl = cleanup.get_best_available_explanations(
+    monkeypatch.setattr(enum_explanations_helpers, "_binding_version", lambda: version)
+    expl = enum_explanations_helpers.get_best_available_explanations(
         _FakeEnumType({7: _FakeEnumMember("clean me")}),
         fallback,
     )
@@ -121,13 +110,3 @@ def test_get_best_available_explanations_switches_by_version(monkeypatch, versio
         assert expl.get(7) == "clean me"
     else:
         assert expl is fallback
-
-
-def test_driver_cu_result_explanations_get_matches_clean_docstring():
-    pytest.importorskip("cuda.bindings")
-    from cuda.bindings import driver
-    from cuda.core._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS
-
-    e = driver.CUresult.CUDA_SUCCESS
-    code = int(e)
-    assert DRIVER_CU_RESULT_EXPLANATIONS.get(code) == clean_enum_member_docstring(e.__doc__)

From f70c09963e1c4e866d7392eee66e14f17fcd35b1 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Mon, 6 Apr 2026 11:41:37 -0700
Subject: [PATCH 6/9] cuda_core: generalize enum doc cleanup regexes

Broaden the inline-role cleanup to accept generic RST roles and widen the word-wrap hyphen fix beyond lowercase-only cases. Keep the current 13.2.x output unchanged while expanding unit coverage for the newly supported forms.

Made-with: Cursor
---
 .../core/_utils/enum_explanations_helpers.py   | 17 ++++++++---------
 .../test_utils_enum_explanations_helpers.py    | 18 ++++++++++++++++--
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
index c58c1c9aee..27b5dc1c74 100644
--- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
+++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
@@ -23,6 +23,9 @@
 
 _MIN_12X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (12, 9, 6)
 _MIN_13X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (13, 2, 0)
+_RST_INLINE_ROLE_RE = re.compile(r":(?:[a-z]+:)?[a-z]+:`([^`]+)`")
+_WORDWRAP_HYPHEN_AFTER_RE = re.compile(r"(?<=[0-9A-Za-z_])- (?=[0-9A-Za-z_])")
+_WORDWRAP_HYPHEN_BEFORE_RE = re.compile(r"(?<=[0-9A-Za-z_]) -(?=[0-9A-Za-z_])")
 
 
 # ``version.pyx`` cannot be reused here (circular import via ``cuda_utils``).
@@ -46,14 +49,14 @@ def _binding_version_has_usable_enum_docstrings(version: tuple[int, int, int]) -
 def _fix_hyphenation_wordwrap_spacing(s: str) -> str:
     """Remove spaces around hyphens introduced by line wrapping in generated ``__doc__`` text.
 
-    This is a narrow workaround for wrapped forms such as ``non- linear`` that
-    would otherwise look awkward in user-facing messages.
+    This targets asymmetric wrap artifacts such as ``non- linear`` or
+    ``GPU- Direct`` while leaving intentional ``a - b`` separators alone.
     """
     prev = None
     while prev != s:
         prev = s
-        s = re.sub(r"([a-z])- ([a-z])", r"\1-\2", s)
-        s = re.sub(r"([a-z]) -([a-z])", r"\1-\2", s)
+        s = _WORDWRAP_HYPHEN_AFTER_RE.sub("-", s)
+        s = _WORDWRAP_HYPHEN_BEFORE_RE.sub("-", s)
     return s
 
 
@@ -71,11 +74,7 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
     # Known codegen bug on cudaErrorIncompatibleDriverContext. Remove once fixed
     # in cuda-bindings code generation.
     s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ')
-    s = re.sub(
-        r":(?:py:)?(?:obj|func|meth|class|mod|data|const|exc):`([^`]+)`",
-        lambda m: re.sub(r"^~?\.", "", m.group(1)),
-        s,
-    )
+    s = _RST_INLINE_ROLE_RE.sub(lambda m: re.sub(r"^~?\.", "", m.group(1)), s)
     s = re.sub(r"\*\*([^*]+)\*\*", r"\1", s)
     s = re.sub(r"\*([^*]+)\*", r"\1", s)
     s = re.sub(r"\s+", " ", s).strip()
diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py
index da70ef6e58..d31e40ee47 100644
--- a/cuda_core/tests/test_utils_enum_explanations_helpers.py
+++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py
@@ -36,18 +36,32 @@ def __call__(self, code):
         pytest.param(
             "see\n:py:obj:`~.cuInit()` or :py:obj:`cuCtxDestroy()`",
             "see cuInit() or cuCtxDestroy()",
-            id="sphinx_py_obj_roles",
+            id="rst_py_domain_role",
         ),
         pytest.param(
             "x :py:func:`~.cudaMalloc()` y",
             "x cudaMalloc() y",
-            id="sphinx_py_func_role",
+            id="rst_py_role",
         ),
+        pytest.param(
+            "x :c:func:`cuLaunchKernel` y",
+            "x cuLaunchKernel y",
+            id="rst_non_py_domain_role",
+        ),
+        pytest.param("x :term:`device` y", "x device y", id="rst_role_without_domain"),
         pytest.param("**Note:** text", "Note: text", id="strip_bold"),
         pytest.param("*Note* text", "Note text", id="strip_italic"),
         pytest.param("[Deprecated]\n", "[Deprecated]", id="deprecated_line"),
         pytest.param("non- linear", "non-linear", id="hyphen_space_after"),
         pytest.param("word -word", "word-word", id="hyphen_space_before"),
+        pytest.param("GPU- Direct", "GPU-Direct", id="hyphen_space_after_uppercase"),
+        pytest.param("peer -GPU", "peer-GPU", id="hyphen_space_before_uppercase"),
+        pytest.param("L2- cache", "L2-cache", id="hyphen_space_after_digit"),
+        pytest.param(
+            "Common causes are - a. bad access",
+            "Common causes are - a. bad access",
+            id="preserve_dash_separator",
+        ),
         pytest.param(
             'Please see\n:py:obj:`~.Interactions`with the CUDA Driver API" for more information.',
             'Please see "Interactions with the CUDA Driver API" for more information.',

From c4b5862a8693ff82eca5b7dd0272fc8894da0cb2 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Mon, 6 Apr 2026 11:41:54 -0700
Subject: [PATCH 7/9] cuda_core: document inline enum cleanup regexes

Add terse comments for the remaining inline regex substitutions so the docstring cleanup steps are easier to follow without changing behavior.

Made-with: Cursor
---
 cuda_core/cuda/core/_utils/enum_explanations_helpers.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
index 27b5dc1c74..3b42ae5406 100644
--- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
+++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
@@ -74,9 +74,13 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
     # Known codegen bug on cudaErrorIncompatibleDriverContext. Remove once fixed
     # in cuda-bindings code generation.
     s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ')
+    # Drop a leading "~." or "." after removing the surrounding RST inline role.
     s = _RST_INLINE_ROLE_RE.sub(lambda m: re.sub(r"^~?\.", "", m.group(1)), s)
+    # Strip simple bold emphasis markers.
     s = re.sub(r"\*\*([^*]+)\*\*", r"\1", s)
+    # Strip simple italic emphasis markers.
     s = re.sub(r"\*([^*]+)\*", r"\1", s)
+    # Collapse wrapped lines and repeated spaces.
     s = re.sub(r"\s+", " ", s).strip()
     s = _fix_hyphenation_wordwrap_spacing(s)
     return s

From 70258546444afe54b2632a39d68e7ec7f601c996 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Mon, 6 Apr 2026 12:20:42 -0700
Subject: [PATCH 8/9] cuda_core: lazily import frozen enum explanation tables

Keep cuda_utils.pyx unchanged while moving the large 13.1.1 explanation tables into frozen-only modules that are imported only for older bindings. Use loader-aware selection in enum_explanations_helpers.py and add tests that prove docstring-capable bindings skip the frozen-module imports.

Made-with: Cursor
---
 .../_utils/driver_cu_result_explanations.py   | 355 +-----------
 .../driver_cu_result_explanations_frozen.py   | 350 +++++++++++
 .../core/_utils/enum_explanations_helpers.py  |  10 +-
 .../_utils/runtime_cuda_error_explanations.py | 543 +-----------------
 .../runtime_cuda_error_explanations_frozen.py | 538 +++++++++++++++++
 .../test_utils_enum_explanations_helpers.py   |  44 ++
 6 files changed, 954 insertions(+), 886 deletions(-)
 create mode 100644 cuda_core/cuda/core/_utils/driver_cu_result_explanations_frozen.py
 create mode 100644 cuda_core/cuda/core/_utils/runtime_cuda_error_explanations_frozen.py

diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
index 5184e72650..f4894d7563 100644
--- a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
+++ b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py
@@ -4,352 +4,11 @@
 from cuda.bindings import driver
 from cuda.core._utils.enum_explanations_helpers import get_best_available_explanations
 
-# CUDA Toolkit v13.1.1
-_FALLBACK_EXPLANATIONS = {
-    0: (
-        "The API call returned with no errors. In the case of query calls, this"
-        " also means that the operation being queried is complete (see"
-        " ::cuEventQuery() and ::cuStreamQuery())."
-    ),
-    1: (
-        "This indicates that one or more of the parameters passed to the API call"
-        " is not within an acceptable range of values."
-    ),
-    2: (
-        "The API call failed because it was unable to allocate enough memory or"
-        " other resources to perform the requested operation."
-    ),
-    3: (
-        "This indicates that the CUDA driver has not been initialized with"
-        " ::cuInit() or that initialization has failed."
-    ),
-    4: "This indicates that the CUDA driver is in the process of shutting down.",
-    5: (
-        "This indicates profiler is not initialized for this run. This can"
-        " happen when the application is running with external profiling tools"
-        " like visual profiler."
-    ),
-    6: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to attempt to enable/disable the profiling via ::cuProfilerStart or"
-        " ::cuProfilerStop without initialization."
-    ),
-    7: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to call cuProfilerStart() when profiling is already enabled."
-    ),
-    8: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to call cuProfilerStop() when profiling is already disabled."
-    ),
-    34: (
-        "This indicates that the CUDA driver that the application has loaded is a"
-        " stub library. Applications that run with the stub rather than a real"
-        " driver loaded will result in CUDA API returning this error."
-    ),
-    36: (
-        "This indicates that the API call requires a newer CUDA driver than the one"
-        " currently installed. Users should install an updated NVIDIA CUDA driver"
-        " to allow the API call to succeed."
-    ),
-    46: (
-        "This indicates that requested CUDA device is unavailable at the current"
-        " time. Devices are often unavailable due to use of"
-        " ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS or ::CU_COMPUTEMODE_PROHIBITED."
-    ),
-    100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),
-    101: (
-        "This indicates that the device ordinal supplied by the user does not"
-        " correspond to a valid CUDA device or that the action requested is"
-        " invalid for the specified device."
-    ),
-    102: "This error indicates that the Grid license is not applied.",
-    200: ("This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module."),
-    201: (
-        "This most frequently indicates that there is no context bound to the"
-        " current thread. This can also be returned if the context passed to an"
-        " API call is not a valid handle (such as a context that has had"
-        " ::cuCtxDestroy() invoked on it). This can also be returned if a user"
-        " mixes different API versions (i.e. 3010 context with 3020 API calls)."
-        " See ::cuCtxGetApiVersion() for more details."
-        " This can also be returned if the green context passed to an API call"
-        " was not converted to a ::CUcontext using ::cuCtxFromGreenCtx API."
-    ),
-    202: (
-        "This indicated that the context being supplied as a parameter to the"
-        " API call was already the active context."
-        " This error return is deprecated as of CUDA 3.2. It is no longer an"
-        " error to attempt to push the active context via ::cuCtxPushCurrent()."
-    ),
-    205: "This indicates that a map or register operation has failed.",
-    206: "This indicates that an unmap or unregister operation has failed.",
-    207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),
-    208: "This indicates that the resource is already mapped.",
-    209: (
-        "This indicates that there is no kernel image available that is suitable"
-        " for the device. This can occur when a user specifies code generation"
-        " options for a particular CUDA source file that do not include the"
-        " corresponding device configuration."
-    ),
-    210: "This indicates that a resource has already been acquired.",
-    211: "This indicates that a resource is not mapped.",
-    212: ("This indicates that a mapped resource is not available for access as an array."),
-    213: ("This indicates that a mapped resource is not available for access as a pointer."),
-    214: ("This indicates that an uncorrectable ECC error was detected during execution."),
-    215: ("This indicates that the ::CUlimit passed to the API call is not supported by the active device."),
-    216: (
-        "This indicates that the ::CUcontext passed to the API call can"
-        " only be bound to a single CPU thread at a time but is already"
-        " bound to a CPU thread."
-    ),
-    217: ("This indicates that peer access is not supported across the given devices."),
-    218: "This indicates that a PTX JIT compilation failed.",
-    219: "This indicates an error with OpenGL or DirectX context.",
-    220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),
-    221: "This indicates that the PTX JIT compiler library was not found.",
-    222: "This indicates that the provided PTX was compiled with an unsupported toolchain.",
-    223: "This indicates that the PTX JIT compilation was disabled.",
-    224: ("This indicates that the ::CUexecAffinityType passed to the API call is not supported by the active device."),
-    225: (
-        "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."
-    ),
-    226: (
-        "This indicates that an exception occurred on the device that is now"
-        " contained by the GPU's error containment capability. Common causes are -"
-        " a. Certain types of invalid accesses of peer GPU memory over nvlink"
-        " b. Certain classes of hardware errors"
-        " This leaves the process in an inconsistent state and any further CUDA"
-        " work will return the same error. To continue using CUDA, the process must"
-        " be terminated and relaunched."
-    ),
-    300: (
-        "This indicates that the device kernel source is invalid. This includes"
-        " compilation/linker errors encountered in device code or user error."
-    ),
-    301: "This indicates that the file specified was not found.",
-    302: "This indicates that a link to a shared object failed to resolve.",
-    303: "This indicates that initialization of a shared object failed.",
-    304: "This indicates that an OS call failed.",
-    400: (
-        "This indicates that a resource handle passed to the API call was not"
-        " valid. Resource handles are opaque types like ::CUstream and ::CUevent."
-    ),
-    401: (
-        "This indicates that a resource required by the API call is not in a"
-        " valid state to perform the requested operation."
-    ),
-    402: (
-        "This indicates an attempt was made to introspect an object in a way that"
-        " would discard semantically important information. This is either due to"
-        " the object using funtionality newer than the API version used to"
-        " introspect it or omission of optional return arguments."
-    ),
-    500: (
-        "This indicates that a named symbol was not found. Examples of symbols"
-        " are global/constant variable names, driver function names, texture names,"
-        " and surface names."
-    ),
-    600: (
-        "This indicates that asynchronous operations issued previously have not"
-        " completed yet. This result is not actually an error, but must be indicated"
-        " differently than ::CUDA_SUCCESS (which indicates completion). Calls that"
-        " may return this value include ::cuEventQuery() and ::cuStreamQuery()."
-    ),
-    700: (
-        "While executing a kernel, the device encountered a"
-        " load or store instruction on an invalid memory address."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    701: (
-        "This indicates that a launch did not occur because it did not have"
-        " appropriate resources. This error usually indicates that the user has"
-        " attempted to pass too many arguments to the device kernel, or the"
-        " kernel launch specifies too many threads for the kernel's register"
-        " count. Passing arguments of the wrong size (i.e. a 64-bit pointer"
-        " when a 32-bit int is expected) is equivalent to passing too many"
-        " arguments and can also result in this error."
-    ),
-    702: (
-        "This indicates that the device kernel took too long to execute. This can"
-        " only occur if timeouts are enabled - see the device attribute"
-        " ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),
-    704: (
-        "This error indicates that a call to ::cuCtxEnablePeerAccess() is"
-        " trying to re-enable peer access to a context which has already"
-        " had peer access to it enabled."
-    ),
-    705: (
-        "This error indicates that ::cuCtxDisablePeerAccess() is"
-        " trying to disable peer access which has not been enabled yet"
-        " via ::cuCtxEnablePeerAccess()."
-    ),
-    708: ("This error indicates that the primary context for the specified device has already been initialized."),
-    709: (
-        "This error indicates that the context current to the calling thread"
-        " has been destroyed using ::cuCtxDestroy, or is a primary context which"
-        " has not yet been initialized."
-    ),
-    710: (
-        "A device-side assert triggered during kernel execution. The context"
-        " cannot be used anymore, and must be destroyed. All existing device"
-        " memory allocations from this context are invalid and must be"
-        " reconstructed if the program is to continue using CUDA."
-    ),
-    711: (
-        "This error indicates that the hardware resources required to enable"
-        " peer access have been exhausted for one or more of the devices"
-        " passed to ::cuCtxEnablePeerAccess()."
-    ),
-    712: ("This error indicates that the memory range passed to ::cuMemHostRegister() has already been registered."),
-    713: (
-        "This error indicates that the pointer passed to ::cuMemHostUnregister()"
-        " does not correspond to any currently registered memory region."
-    ),
-    714: (
-        "While executing a kernel, the device encountered a stack error."
-        " This can be due to stack corruption or exceeding the stack size limit."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    715: (
-        "While executing a kernel, the device encountered an illegal instruction."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    716: (
-        "While executing a kernel, the device encountered a load or store instruction"
-        " on a memory address which is not aligned."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    717: (
-        "While executing a kernel, the device encountered an instruction"
-        " which can only operate on memory locations in certain address spaces"
-        " (global, shared, or local), but was supplied a memory address not"
-        " belonging to an allowed address space."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    718: (
-        "While executing a kernel, the device program counter wrapped its address space."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    719: (
-        "An exception occurred on the device while executing a kernel. Common"
-        " causes include dereferencing an invalid device pointer and accessing"
-        " out of bounds shared memory. Less common cases can be system specific - more"
-        " information about these cases can be found in the system specific user guide."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    720: (
-        "This error indicates that the number of blocks launched per grid for a kernel that was"
-        " launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice"
-        " exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor"
-        " or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"
-        " as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT."
-    ),
-    721: (
-        "An exception occurred on the device while exiting a kernel using tensor memory: the"
-        " tensor memory was not completely deallocated. This leaves the process in an inconsistent"
-        " state and any further CUDA work will return the same error. To continue using CUDA, the"
-        " process must be terminated and relaunched."
-    ),
-    800: "This error indicates that the attempted operation is not permitted.",
-    801: ("This error indicates that the attempted operation is not supported on the current system or device."),
-    802: (
-        "This error indicates that the system is not yet ready to start any CUDA"
-        " work.  To continue using CUDA, verify the system configuration is in a"
-        " valid state and all required driver daemons are actively running."
-        " More information about this error can be found in the system specific"
-        " user guide."
-    ),
-    803: (
-        "This error indicates that there is a mismatch between the versions of"
-        " the display driver and the CUDA driver. Refer to the compatibility documentation"
-        " for supported versions."
-    ),
-    804: (
-        "This error indicates that the system was upgraded to run with forward compatibility"
-        " but the visible hardware detected by CUDA does not support this configuration."
-        " Refer to the compatibility documentation for the supported hardware matrix or ensure"
-        " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"
-        " environment variable."
-    ),
-    805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",
-    806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",
-    807: (
-        "This error indicates that the MPS server is not ready to accept new MPS client requests."
-        " This error can be returned when the MPS server is in the process of recovering from a fatal failure."
-    ),
-    808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",
-    809: "This error indicates the the hardware resources required to support device connections have been exhausted.",
-    810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",
-    811: "This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",
-    812: "This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",
-    900: ("This error indicates that the operation is not permitted when the stream is capturing."),
-    901: (
-        "This error indicates that the current capture sequence on the stream"
-        " has been invalidated due to a previous error."
-    ),
-    902: (
-        "This error indicates that the operation would have resulted in a merge of two independent capture sequences."
-    ),
-    903: "This error indicates that the capture was not initiated in this stream.",
-    904: ("This error indicates that the capture sequence contains a fork that was not joined to the primary stream."),
-    905: (
-        "This error indicates that a dependency would have been created which"
-        " crosses the capture sequence boundary. Only implicit in-stream ordering"
-        " dependencies are allowed to cross the boundary."
-    ),
-    906: ("This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy."),
-    907: (
-        "This error indicates that the operation is not permitted on an event which"
-        " was last recorded in a capturing stream."
-    ),
-    908: (
-        "A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED"
-        " argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a"
-        " different thread."
-    ),
-    909: "This error indicates that the timeout specified for the wait operation has lapsed.",
-    910: (
-        "This error indicates that the graph update was not performed because it included"
-        " changes which violated constraints specific to instantiated graph update."
-    ),
-    911: (
-        "This indicates that an async error has occurred in a device outside of CUDA."
-        " If CUDA was waiting for an external device's signal before consuming shared data,"
-        " the external device signaled an error indicating that the data is not valid for"
-        " consumption. This leaves the process in an inconsistent state and any further CUDA"
-        " work will return the same error. To continue using CUDA, the process must be"
-        " terminated and relaunched."
-    ),
-    912: "Indicates a kernel launch error due to cluster misconfiguration.",
-    913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
-    914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),
-    915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),
-    916: ("This error indicates that an error happened during the key rotation sequence."),
-    917: (
-        "This error indicates that the requested operation is not permitted because the"
-        " stream is in a detached state. This can occur if the green context associated"
-        " with the stream has been destroyed, limiting the stream's operational capabilities."
-    ),
-    999: "This indicates that an unknown internal error has occurred.",
-}
 
-DRIVER_CU_RESULT_EXPLANATIONS = get_best_available_explanations(driver.CUresult, _FALLBACK_EXPLANATIONS)
+def _load_fallback_explanations():
+    from cuda.core._utils.driver_cu_result_explanations_frozen import _FALLBACK_EXPLANATIONS
+
+    return _FALLBACK_EXPLANATIONS
+
+
+DRIVER_CU_RESULT_EXPLANATIONS = get_best_available_explanations(driver.CUresult, _load_fallback_explanations)
diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations_frozen.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations_frozen.py
new file mode 100644
index 0000000000..e396afaa79
--- /dev/null
+++ b/cuda_core/cuda/core/_utils/driver_cu_result_explanations_frozen.py
@@ -0,0 +1,350 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+# CUDA Toolkit v13.1.1
+_FALLBACK_EXPLANATIONS = {
+    0: (
+        "The API call returned with no errors. In the case of query calls, this"
+        " also means that the operation being queried is complete (see"
+        " ::cuEventQuery() and ::cuStreamQuery())."
+    ),
+    1: (
+        "This indicates that one or more of the parameters passed to the API call"
+        " is not within an acceptable range of values."
+    ),
+    2: (
+        "The API call failed because it was unable to allocate enough memory or"
+        " other resources to perform the requested operation."
+    ),
+    3: (
+        "This indicates that the CUDA driver has not been initialized with"
+        " ::cuInit() or that initialization has failed."
+    ),
+    4: "This indicates that the CUDA driver is in the process of shutting down.",
+    5: (
+        "This indicates profiler is not initialized for this run. This can"
+        " happen when the application is running with external profiling tools"
+        " like visual profiler."
+    ),
+    6: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to attempt to enable/disable the profiling via ::cuProfilerStart or"
+        " ::cuProfilerStop without initialization."
+    ),
+    7: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to call cuProfilerStart() when profiling is already enabled."
+    ),
+    8: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to call cuProfilerStop() when profiling is already disabled."
+    ),
+    34: (
+        "This indicates that the CUDA driver that the application has loaded is a"
+        " stub library. Applications that run with the stub rather than a real"
+        " driver loaded will result in CUDA API returning this error."
+    ),
+    36: (
+        "This indicates that the API call requires a newer CUDA driver than the one"
+        " currently installed. Users should install an updated NVIDIA CUDA driver"
+        " to allow the API call to succeed."
+    ),
+    46: (
+        "This indicates that requested CUDA device is unavailable at the current"
+        " time. Devices are often unavailable due to use of"
+        " ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS or ::CU_COMPUTEMODE_PROHIBITED."
+    ),
+    100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),
+    101: (
+        "This indicates that the device ordinal supplied by the user does not"
+        " correspond to a valid CUDA device or that the action requested is"
+        " invalid for the specified device."
+    ),
+    102: "This error indicates that the Grid license is not applied.",
+    200: ("This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module."),
+    201: (
+        "This most frequently indicates that there is no context bound to the"
+        " current thread. This can also be returned if the context passed to an"
+        " API call is not a valid handle (such as a context that has had"
+        " ::cuCtxDestroy() invoked on it). This can also be returned if a user"
+        " mixes different API versions (i.e. 3010 context with 3020 API calls)."
+        " See ::cuCtxGetApiVersion() for more details."
+        " This can also be returned if the green context passed to an API call"
+        " was not converted to a ::CUcontext using ::cuCtxFromGreenCtx API."
+    ),
+    202: (
+        "This indicated that the context being supplied as a parameter to the"
+        " API call was already the active context."
+        " This error return is deprecated as of CUDA 3.2. It is no longer an"
+        " error to attempt to push the active context via ::cuCtxPushCurrent()."
+    ),
+    205: "This indicates that a map or register operation has failed.",
+    206: "This indicates that an unmap or unregister operation has failed.",
+    207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),
+    208: "This indicates that the resource is already mapped.",
+    209: (
+        "This indicates that there is no kernel image available that is suitable"
+        " for the device. This can occur when a user specifies code generation"
+        " options for a particular CUDA source file that do not include the"
+        " corresponding device configuration."
+    ),
+    210: "This indicates that a resource has already been acquired.",
+    211: "This indicates that a resource is not mapped.",
+    212: ("This indicates that a mapped resource is not available for access as an array."),
+    213: ("This indicates that a mapped resource is not available for access as a pointer."),
+    214: ("This indicates that an uncorrectable ECC error was detected during execution."),
+    215: ("This indicates that the ::CUlimit passed to the API call is not supported by the active device."),
+    216: (
+        "This indicates that the ::CUcontext passed to the API call can"
+        " only be bound to a single CPU thread at a time but is already"
+        " bound to a CPU thread."
+    ),
+    217: ("This indicates that peer access is not supported across the given devices."),
+    218: "This indicates that a PTX JIT compilation failed.",
+    219: "This indicates an error with OpenGL or DirectX context.",
+    220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),
+    221: "This indicates that the PTX JIT compiler library was not found.",
+    222: "This indicates that the provided PTX was compiled with an unsupported toolchain.",
+    223: "This indicates that the PTX JIT compilation was disabled.",
+    224: ("This indicates that the ::CUexecAffinityType passed to the API call is not supported by the active device."),
+    225: (
+        "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."
+    ),
+    226: (
+        "This indicates that an exception occurred on the device that is now"
+        " contained by the GPU's error containment capability. Common causes are -"
+        " a. Certain types of invalid accesses of peer GPU memory over nvlink"
+        " b. Certain classes of hardware errors"
+        " This leaves the process in an inconsistent state and any further CUDA"
+        " work will return the same error. To continue using CUDA, the process must"
+        " be terminated and relaunched."
+    ),
+    300: (
+        "This indicates that the device kernel source is invalid. This includes"
+        " compilation/linker errors encountered in device code or user error."
+    ),
+    301: "This indicates that the file specified was not found.",
+    302: "This indicates that a link to a shared object failed to resolve.",
+    303: "This indicates that initialization of a shared object failed.",
+    304: "This indicates that an OS call failed.",
+    400: (
+        "This indicates that a resource handle passed to the API call was not"
+        " valid. Resource handles are opaque types like ::CUstream and ::CUevent."
+    ),
+    401: (
+        "This indicates that a resource required by the API call is not in a"
+        " valid state to perform the requested operation."
+    ),
+    402: (
+        "This indicates an attempt was made to introspect an object in a way that"
+        " would discard semantically important information. This is either due to"
+        " the object using funtionality newer than the API version used to"
+        " introspect it or omission of optional return arguments."
+    ),
+    500: (
+        "This indicates that a named symbol was not found. Examples of symbols"
+        " are global/constant variable names, driver function names, texture names,"
+        " and surface names."
+    ),
+    600: (
+        "This indicates that asynchronous operations issued previously have not"
+        " completed yet. This result is not actually an error, but must be indicated"
+        " differently than ::CUDA_SUCCESS (which indicates completion). Calls that"
+        " may return this value include ::cuEventQuery() and ::cuStreamQuery()."
+    ),
+    700: (
+        "While executing a kernel, the device encountered a"
+        " load or store instruction on an invalid memory address."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    701: (
+        "This indicates that a launch did not occur because it did not have"
+        " appropriate resources. This error usually indicates that the user has"
+        " attempted to pass too many arguments to the device kernel, or the"
+        " kernel launch specifies too many threads for the kernel's register"
+        " count. Passing arguments of the wrong size (i.e. a 64-bit pointer"
+        " when a 32-bit int is expected) is equivalent to passing too many"
+        " arguments and can also result in this error."
+    ),
+    702: (
+        "This indicates that the device kernel took too long to execute. This can"
+        " only occur if timeouts are enabled - see the device attribute"
+        " ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),
+    704: (
+        "This error indicates that a call to ::cuCtxEnablePeerAccess() is"
+        " trying to re-enable peer access to a context which has already"
+        " had peer access to it enabled."
+    ),
+    705: (
+        "This error indicates that ::cuCtxDisablePeerAccess() is"
+        " trying to disable peer access which has not been enabled yet"
+        " via ::cuCtxEnablePeerAccess()."
+    ),
+    708: ("This error indicates that the primary context for the specified device has already been initialized."),
+    709: (
+        "This error indicates that the context current to the calling thread"
+        " has been destroyed using ::cuCtxDestroy, or is a primary context which"
+        " has not yet been initialized."
+    ),
+    710: (
+        "A device-side assert triggered during kernel execution. The context"
+        " cannot be used anymore, and must be destroyed. All existing device"
+        " memory allocations from this context are invalid and must be"
+        " reconstructed if the program is to continue using CUDA."
+    ),
+    711: (
+        "This error indicates that the hardware resources required to enable"
+        " peer access have been exhausted for one or more of the devices"
+        " passed to ::cuCtxEnablePeerAccess()."
+    ),
+    712: ("This error indicates that the memory range passed to ::cuMemHostRegister() has already been registered."),
+    713: (
+        "This error indicates that the pointer passed to ::cuMemHostUnregister()"
+        " does not correspond to any currently registered memory region."
+    ),
+    714: (
+        "While executing a kernel, the device encountered a stack error."
+        " This can be due to stack corruption or exceeding the stack size limit."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    715: (
+        "While executing a kernel, the device encountered an illegal instruction."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    716: (
+        "While executing a kernel, the device encountered a load or store instruction"
+        " on a memory address which is not aligned."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    717: (
+        "While executing a kernel, the device encountered an instruction"
+        " which can only operate on memory locations in certain address spaces"
+        " (global, shared, or local), but was supplied a memory address not"
+        " belonging to an allowed address space."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    718: (
+        "While executing a kernel, the device program counter wrapped its address space."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    719: (
+        "An exception occurred on the device while executing a kernel. Common"
+        " causes include dereferencing an invalid device pointer and accessing"
+        " out of bounds shared memory. Less common cases can be system specific - more"
+        " information about these cases can be found in the system specific user guide."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    720: (
+        "This error indicates that the number of blocks launched per grid for a kernel that was"
+        " launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice"
+        " exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor"
+        " or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"
+        " as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT."
+    ),
+    721: (
+        "An exception occurred on the device while exiting a kernel using tensor memory: the"
+        " tensor memory was not completely deallocated. This leaves the process in an inconsistent"
+        " state and any further CUDA work will return the same error. To continue using CUDA, the"
+        " process must be terminated and relaunched."
+    ),
+    800: "This error indicates that the attempted operation is not permitted.",
+    801: ("This error indicates that the attempted operation is not supported on the current system or device."),
+    802: (
+        "This error indicates that the system is not yet ready to start any CUDA"
+        " work.  To continue using CUDA, verify the system configuration is in a"
+        " valid state and all required driver daemons are actively running."
+        " More information about this error can be found in the system specific"
+        " user guide."
+    ),
+    803: (
+        "This error indicates that there is a mismatch between the versions of"
+        " the display driver and the CUDA driver. Refer to the compatibility documentation"
+        " for supported versions."
+    ),
+    804: (
+        "This error indicates that the system was upgraded to run with forward compatibility"
+        " but the visible hardware detected by CUDA does not support this configuration."
+        " Refer to the compatibility documentation for the supported hardware matrix or ensure"
+        " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"
+        " environment variable."
+    ),
+    805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",
+    806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",
+    807: (
+        "This error indicates that the MPS server is not ready to accept new MPS client requests."
+        " This error can be returned when the MPS server is in the process of recovering from a fatal failure."
+    ),
+    808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",
+    809: "This error indicates the the hardware resources required to support device connections have been exhausted.",
+    810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",
+    811: "This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",
+    812: "This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",
+    900: ("This error indicates that the operation is not permitted when the stream is capturing."),
+    901: (
+        "This error indicates that the current capture sequence on the stream"
+        " has been invalidated due to a previous error."
+    ),
+    902: (
+        "This error indicates that the operation would have resulted in a merge of two independent capture sequences."
+    ),
+    903: "This error indicates that the capture was not initiated in this stream.",
+    904: ("This error indicates that the capture sequence contains a fork that was not joined to the primary stream."),
+    905: (
+        "This error indicates that a dependency would have been created which"
+        " crosses the capture sequence boundary. Only implicit in-stream ordering"
+        " dependencies are allowed to cross the boundary."
+    ),
+    906: ("This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy."),
+    907: (
+        "This error indicates that the operation is not permitted on an event which"
+        " was last recorded in a capturing stream."
+    ),
+    908: (
+        "A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED"
+        " argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a"
+        " different thread."
+    ),
+    909: "This error indicates that the timeout specified for the wait operation has lapsed.",
+    910: (
+        "This error indicates that the graph update was not performed because it included"
+        " changes which violated constraints specific to instantiated graph update."
+    ),
+    911: (
+        "This indicates that an async error has occurred in a device outside of CUDA."
+        " If CUDA was waiting for an external device's signal before consuming shared data,"
+        " the external device signaled an error indicating that the data is not valid for"
+        " consumption. This leaves the process in an inconsistent state and any further CUDA"
+        " work will return the same error. To continue using CUDA, the process must be"
+        " terminated and relaunched."
+    ),
+    912: "Indicates a kernel launch error due to cluster misconfiguration.",
+    913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
+    914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),
+    915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),
+    916: ("This error indicates that an error happened during the key rotation sequence."),
+    917: (
+        "This error indicates that the requested operation is not permitted because the"
+        " stream is in a detached state. This can occur if the green context associated"
+        " with the stream has been destroyed, limiting the stream's operational capabilities."
+    ),
+    999: "This indicates that an unknown internal error has occurred.",
+}
diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
index 3b42ae5406..a176de73d1 100644
--- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
+++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py
@@ -19,6 +19,7 @@
 
 import importlib.metadata
 import re
+from collections.abc import Callable
 from typing import Any
 
 _MIN_12X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (12, 9, 6)
@@ -26,6 +27,8 @@
 _RST_INLINE_ROLE_RE = re.compile(r":(?:[a-z]+:)?[a-z]+:`([^`]+)`")
 _WORDWRAP_HYPHEN_AFTER_RE = re.compile(r"(?<=[0-9A-Za-z_])- (?=[0-9A-Za-z_])")
 _WORDWRAP_HYPHEN_BEFORE_RE = re.compile(r"(?<=[0-9A-Za-z_]) -(?=[0-9A-Za-z_])")
+_ExplanationTable = dict[int, str | tuple[str, ...]]
+_ExplanationTableLoader = Callable[[], _ExplanationTable]
 
 
 # ``version.pyx`` cannot be reused here (circular import via ``cuda_utils``).
@@ -111,8 +114,9 @@ def get(self, code: int, default: str | None = None) -> str | None:
 
 
 def get_best_available_explanations(
-    enum_type: Any, fallback: dict[int, str | tuple[str, ...]]
-) -> DocstringBackedExplanations | dict[int, str | tuple[str, ...]]:
+    enum_type: Any,
+    fallback: _ExplanationTable | _ExplanationTableLoader,
+) -> DocstringBackedExplanations | _ExplanationTable:
     """Pick one explanation source per bindings version.
 
     Use enum-member ``__doc__`` only for bindings versions known to expose
@@ -120,5 +124,7 @@ def get_best_available_explanations(
     13.x mainline). Otherwise keep using the frozen 13.1.1 fallback tables.
     """
     if not _binding_version_has_usable_enum_docstrings(_binding_version()):
+        if callable(fallback):
+            return fallback()
         return fallback
     return DocstringBackedExplanations(enum_type)
diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
index 5f587a0350..ab5be10e2d 100644
--- a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
+++ b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py
@@ -4,540 +4,11 @@
 from cuda.bindings import runtime
 from cuda.core._utils.enum_explanations_helpers import get_best_available_explanations
 
-# CUDA Toolkit v13.1.1
-_FALLBACK_EXPLANATIONS = {
-    0: (
-        "The API call returned with no errors. In the case of query calls, this"
-        " also means that the operation being queried is complete (see"
-        " ::cudaEventQuery() and ::cudaStreamQuery())."
-    ),
-    1: (
-        "This indicates that one or more of the parameters passed to the API call"
-        " is not within an acceptable range of values."
-    ),
-    2: (
-        "The API call failed because it was unable to allocate enough memory or"
-        " other resources to perform the requested operation."
-    ),
-    3: ("The API call failed because the CUDA driver and runtime could not be initialized."),
-    4: (
-        "This indicates that a CUDA Runtime API call cannot be executed because"
-        " it is being called during process shut down, at a point in time after"
-        " CUDA driver has been unloaded."
-    ),
-    5: (
-        "This indicates profiler is not initialized for this run. This can"
-        " happen when the application is running with external profiling tools"
-        " like visual profiler."
-    ),
-    6: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to attempt to enable/disable the profiling via ::cudaProfilerStart or"
-        " ::cudaProfilerStop without initialization."
-    ),
-    7: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to call cudaProfilerStart() when profiling is already enabled."
-    ),
-    8: (
-        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
-        " to call cudaProfilerStop() when profiling is already disabled."
-    ),
-    9: (
-        "This indicates that a kernel launch is requesting resources that can"
-        " never be satisfied by the current device. Requesting more shared memory"
-        " per block than the device supports will trigger this error, as will"
-        " requesting too many threads or blocks. See ::cudaDeviceProp for more"
-        " device limitations."
-    ),
-    12: (
-        "This indicates that one or more of the pitch-related parameters passed"
-        " to the API call is not within the acceptable range for pitch."
-    ),
-    13: ("This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier."),
-    16: (
-        "This indicates that at least one host pointer passed to the API call is"
-        " not a valid host pointer."
-        " This error return is deprecated as of CUDA 10.1."
-    ),
-    17: (
-        "This indicates that at least one device pointer passed to the API call is"
-        " not a valid device pointer."
-        " This error return is deprecated as of CUDA 10.1."
-    ),
-    18: ("This indicates that the texture passed to the API call is not a valid texture."),
-    19: (
-        "This indicates that the texture binding is not valid. This occurs if you"
-        " call ::cudaGetTextureAlignmentOffset() with an unbound texture."
-    ),
-    20: (
-        "This indicates that the channel descriptor passed to the API call is not"
-        " valid. This occurs if the format is not one of the formats specified by"
-        " ::cudaChannelFormatKind, or if one of the dimensions is invalid."
-    ),
-    21: (
-        "This indicates that the direction of the memcpy passed to the API call is"
-        " not one of the types specified by ::cudaMemcpyKind."
-    ),
-    22: (
-        "This indicated that the user has taken the address of a constant variable,"
-        " which was forbidden up until the CUDA 3.1 release."
-        " This error return is deprecated as of CUDA 3.1. Variables in constant"
-        " memory may now have their address taken by the runtime via"
-        " ::cudaGetSymbolAddress()."
-    ),
-    23: (
-        "This indicated that a texture fetch was not able to be performed."
-        " This was previously used for device emulation of texture operations."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    24: (
-        "This indicated that a texture was not bound for access."
-        " This was previously used for device emulation of texture operations."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    25: (
-        "This indicated that a synchronization operation had failed."
-        " This was previously used for some device emulation functions."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    26: (
-        "This indicates that a non-float texture was being accessed with linear"
-        " filtering. This is not supported by CUDA."
-    ),
-    27: (
-        "This indicates that an attempt was made to read an unsupported data type as a"
-        " normalized float. This is not supported by CUDA."
-    ),
-    28: (
-        "Mixing of device and device emulation code was not allowed."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    31: (
-        "This indicates that the API call is not yet implemented. Production"
-        " releases of CUDA will never return this error."
-        " This error return is deprecated as of CUDA 4.1."
-    ),
-    32: (
-        "This indicated that an emulated device pointer exceeded the 32-bit address"
-        " range."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    34: (
-        "This indicates that the CUDA driver that the application has loaded is a"
-        " stub library. Applications that run with the stub rather than a real"
-        " driver loaded will result in CUDA API returning this error."
-    ),
-    35: (
-        "This indicates that the installed NVIDIA CUDA driver is older than the"
-        " CUDA runtime library. This is not a supported configuration. Users should"
-        " install an updated NVIDIA display driver to allow the application to run."
-    ),
-    36: (
-        "This indicates that the API call requires a newer CUDA driver than the one"
-        " currently installed. Users should install an updated NVIDIA CUDA driver"
-        " to allow the API call to succeed."
-    ),
-    37: ("This indicates that the surface passed to the API call is not a valid surface."),
-    43: (
-        "This indicates that multiple global or constant variables (across separate"
-        " CUDA source files in the application) share the same string name."
-    ),
-    44: (
-        "This indicates that multiple textures (across separate CUDA source"
-        " files in the application) share the same string name."
-    ),
-    45: (
-        "This indicates that multiple surfaces (across separate CUDA source"
-        " files in the application) share the same string name."
-    ),
-    46: (
-        "This indicates that all CUDA devices are busy or unavailable at the current"
-        " time. Devices are often busy/unavailable due to use of"
-        " ::cudaComputeModeProhibited, ::cudaComputeModeExclusiveProcess, or when long"
-        " running CUDA kernels have filled up the GPU and are blocking new work"
-        " from starting. They can also be unavailable due to memory constraints"
-        " on a device that already has active CUDA work being performed."
-    ),
-    49: (
-        "This indicates that the current context is not compatible with this"
-        " the CUDA Runtime. This can only occur if you are using CUDA"
-        " Runtime/Driver interoperability and have created an existing Driver"
-        " context using the driver API. The Driver context may be incompatible"
-        " either because the Driver context was created using an older version"
-        " of the API, because the Runtime API call expects a primary driver"
-        " context and the Driver context is not primary, or because the Driver"
-        ' context has been destroyed. Please see CUDART_DRIVER "Interactions'
-        ' with the CUDA Driver API" for more information.'
-    ),
-    52: (
-        "The device function being invoked (usually via ::cudaLaunchKernel()) was not"
-        " previously configured via the ::cudaConfigureCall() function."
-    ),
-    53: (
-        "This indicated that a previous kernel launch failed. This was previously"
-        " used for device emulation of kernel launches."
-        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
-        " removed with the CUDA 3.1 release."
-    ),
-    65: (
-        "This error indicates that a device runtime grid launch did not occur"
-        " because the depth of the child grid would exceed the maximum supported"
-        " number of nested grid launches."
-    ),
-    66: (
-        "This error indicates that a grid launch did not occur because the kernel"
-        " uses file-scoped textures which are unsupported by the device runtime."
-        " Kernels launched via the device runtime only support textures created with"
-        " the Texture Object API's."
-    ),
-    67: (
-        "This error indicates that a grid launch did not occur because the kernel"
-        " uses file-scoped surfaces which are unsupported by the device runtime."
-        " Kernels launched via the device runtime only support surfaces created with"
-        " the Surface Object API's."
-    ),
-    68: (
-        "This error indicates that a call to ::cudaDeviceSynchronize made from"
-        " the device runtime failed because the call was made at grid depth greater"
-        " than than either the default (2 levels of grids) or user specified device"
-        " limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on"
-        " launched grids at a greater depth successfully, the maximum nested"
-        " depth at which ::cudaDeviceSynchronize will be called must be specified"
-        " with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit"
-        " api before the host-side launch of a kernel using the device runtime."
-        " Keep in mind that additional levels of sync depth require the runtime"
-        " to reserve large amounts of device memory that cannot be used for"
-        " user allocations. Note that ::cudaDeviceSynchronize made from device"
-        " runtime is only supported on devices of compute capability < 9.0."
-    ),
-    69: (
-        "This error indicates that a device runtime grid launch failed because"
-        " the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount."
-        " For this launch to proceed successfully, ::cudaDeviceSetLimit must be"
-        " called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher"
-        " than the upper bound of outstanding launches that can be issued to the"
-        " device runtime. Keep in mind that raising the limit of pending device"
-        " runtime launches will require the runtime to reserve device memory that"
-        " cannot be used for user allocations."
-    ),
-    98: ("The requested device function does not exist or is not compiled for the proper device architecture."),
-    100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),
-    101: (
-        "This indicates that the device ordinal supplied by the user does not"
-        " correspond to a valid CUDA device or that the action requested is"
-        " invalid for the specified device."
-    ),
-    102: "This indicates that the device doesn't have a valid Grid License.",
-    103: (
-        "By default, the CUDA runtime may perform a minimal set of self-tests,"
-        " as well as CUDA driver tests, to establish the validity of both."
-        " Introduced in CUDA 11.2, this error return indicates that at least one"
-        " of these tests has failed and the validity of either the runtime"
-        " or the driver could not be established."
-    ),
-    127: "This indicates an internal startup failure in the CUDA runtime.",
-    200: "This indicates that the device kernel image is invalid.",
-    201: (
-        "This most frequently indicates that there is no context bound to the"
-        " current thread. This can also be returned if the context passed to an"
-        " API call is not a valid handle (such as a context that has had"
-        " ::cuCtxDestroy() invoked on it). This can also be returned if a user"
-        " mixes different API versions (i.e. 3010 context with 3020 API calls)."
-        " See ::cuCtxGetApiVersion() for more details."
-    ),
-    205: "This indicates that the buffer object could not be mapped.",
-    206: "This indicates that the buffer object could not be unmapped.",
-    207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),
-    208: "This indicates that the resource is already mapped.",
-    209: (
-        "This indicates that there is no kernel image available that is suitable"
-        " for the device. This can occur when a user specifies code generation"
-        " options for a particular CUDA source file that do not include the"
-        " corresponding device configuration."
-    ),
-    210: "This indicates that a resource has already been acquired.",
-    211: "This indicates that a resource is not mapped.",
-    212: ("This indicates that a mapped resource is not available for access as an array."),
-    213: ("This indicates that a mapped resource is not available for access as a pointer."),
-    214: ("This indicates that an uncorrectable ECC error was detected during execution."),
-    215: ("This indicates that the ::cudaLimit passed to the API call is not supported by the active device."),
-    216: (
-        "This indicates that a call tried to access an exclusive-thread device that"
-        " is already in use by a different thread."
-    ),
-    217: ("This error indicates that P2P access is not supported across the given devices."),
-    218: (
-        "A PTX compilation failed. The runtime may fall back to compiling PTX if"
-        " an application does not contain a suitable binary for the current device."
-    ),
-    219: "This indicates an error with the OpenGL or DirectX context.",
-    220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),
-    221: (
-        "This indicates that the PTX JIT compiler library was not found. The JIT Compiler"
-        " library is used for PTX compilation. The runtime may fall back to compiling PTX"
-        " if an application does not contain a suitable binary for the current device."
-    ),
-    222: (
-        "This indicates that the provided PTX was compiled with an unsupported toolchain."
-        " The most common reason for this, is the PTX was generated by a compiler newer"
-        " than what is supported by the CUDA driver and PTX JIT compiler."
-    ),
-    223: (
-        "This indicates that the JIT compilation was disabled. The JIT compilation compiles"
-        " PTX. The runtime may fall back to compiling PTX if an application does not contain"
-        " a suitable binary for the current device."
-    ),
-    224: "This indicates that the provided execution affinity is not supported by the device.",
-    225: (
-        "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."
-    ),
-    226: (
-        "This indicates that an exception occurred on the device that is now"
-        " contained by the GPU's error containment capability. Common causes are -"
-        " a. Certain types of invalid accesses of peer GPU memory over nvlink"
-        " b. Certain classes of hardware errors"
-        " This leaves the process in an inconsistent state and any further CUDA"
-        " work will return the same error. To continue using CUDA, the process must"
-        " be terminated and relaunched."
-    ),
-    300: "This indicates that the device kernel source is invalid.",
-    301: "This indicates that the file specified was not found.",
-    302: "This indicates that a link to a shared object failed to resolve.",
-    303: "This indicates that initialization of a shared object failed.",
-    304: "This error indicates that an OS call failed.",
-    400: (
-        "This indicates that a resource handle passed to the API call was not"
-        " valid. Resource handles are opaque types like ::cudaStream_t and"
-        " ::cudaEvent_t."
-    ),
-    401: (
-        "This indicates that a resource required by the API call is not in a"
-        " valid state to perform the requested operation."
-    ),
-    402: (
-        "This indicates an attempt was made to introspect an object in a way that"
-        " would discard semantically important information. This is either due to"
-        " the object using funtionality newer than the API version used to"
-        " introspect it or omission of optional return arguments."
-    ),
-    500: (
-        "This indicates that a named symbol was not found. Examples of symbols"
-        " are global/constant variable names, driver function names, texture names,"
-        " and surface names."
-    ),
-    600: (
-        "This indicates that asynchronous operations issued previously have not"
-        " completed yet. This result is not actually an error, but must be indicated"
-        " differently than ::cudaSuccess (which indicates completion). Calls that"
-        " may return this value include ::cudaEventQuery() and ::cudaStreamQuery()."
-    ),
-    700: (
-        "The device encountered a load or store instruction on an invalid memory address."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    701: (
-        "This indicates that a launch did not occur because it did not have"
-        " appropriate resources. Although this error is similar to"
-        " ::cudaErrorInvalidConfiguration, this error usually indicates that the"
-        " user has attempted to pass too many arguments to the device kernel, or the"
-        " kernel launch specifies too many threads for the kernel's register count."
-    ),
-    702: (
-        "This indicates that the device kernel took too long to execute. This can"
-        " only occur if timeouts are enabled - see the device attribute"
-        ' ::cudaDeviceAttr::cudaDevAttrKernelExecTimeout "cudaDevAttrKernelExecTimeout"'
-        " for more information."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),
-    704: (
-        "This error indicates that a call to ::cudaDeviceEnablePeerAccess() is"
-        " trying to re-enable peer addressing on from a context which has already"
-        " had peer addressing enabled."
-    ),
-    705: (
-        "This error indicates that ::cudaDeviceDisablePeerAccess() is trying to"
-        " disable peer addressing which has not been enabled yet via"
-        " ::cudaDeviceEnablePeerAccess()."
-    ),
-    708: (
-        "This indicates that the user has called ::cudaSetValidDevices(),"
-        " ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice(),"
-        " ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or"
-        " ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by"
-        " calling non-device management operations (allocating memory and"
-        " launching kernels are examples of non-device management operations)."
-        " This error can also be returned if using runtime/driver"
-        " interoperability and there is an existing ::CUcontext active on the"
-        " host thread."
-    ),
-    709: (
-        "This error indicates that the context current to the calling thread"
-        " has been destroyed using ::cuCtxDestroy, or is a primary context which"
-        " has not yet been initialized."
-    ),
-    710: (
-        "An assert triggered in device code during kernel execution. The device"
-        " cannot be used again. All existing allocations are invalid. To continue"
-        " using CUDA, the process must be terminated and relaunched."
-    ),
-    711: (
-        "This error indicates that the hardware resources required to enable"
-        " peer access have been exhausted for one or more of the devices"
-        " passed to ::cudaEnablePeerAccess()."
-    ),
-    712: ("This error indicates that the memory range passed to ::cudaHostRegister() has already been registered."),
-    713: (
-        "This error indicates that the pointer passed to ::cudaHostUnregister()"
-        " does not correspond to any currently registered memory region."
-    ),
-    714: (
-        "Device encountered an error in the call stack during kernel execution,"
-        " possibly due to stack corruption or exceeding the stack size limit."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    715: (
-        "The device encountered an illegal instruction during kernel execution"
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    716: (
-        "The device encountered a load or store instruction"
-        " on a memory address which is not aligned."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    717: (
-        "While executing a kernel, the device encountered an instruction"
-        " which can only operate on memory locations in certain address spaces"
-        " (global, shared, or local), but was supplied a memory address not"
-        " belonging to an allowed address space."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    718: (
-        "The device encountered an invalid program counter."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    719: (
-        "An exception occurred on the device while executing a kernel. Common"
-        " causes include dereferencing an invalid device pointer and accessing"
-        " out of bounds shared memory. Less common cases can be system specific - more"
-        " information about these cases can be found in the system specific user guide."
-        " This leaves the process in an inconsistent state and any further CUDA work"
-        " will return the same error. To continue using CUDA, the process must be terminated"
-        " and relaunched."
-    ),
-    720: (
-        "This error indicates that the number of blocks launched per grid for a kernel that was"
-        " launched via either ::cudaLaunchCooperativeKernel"
-        " exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor"
-        " or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"
-        " as specified by the device attribute ::cudaDevAttrMultiProcessorCount."
-    ),
-    721: (
-        "An exception occurred on the device while exiting a kernel using tensor memory: the"
-        " tensor memory was not completely deallocated. This leaves the process in an inconsistent"
-        " state and any further CUDA work will return the same error. To continue using CUDA, the"
-        " process must be terminated and relaunched."
-    ),
-    800: "This error indicates the attempted operation is not permitted.",
-    801: ("This error indicates the attempted operation is not supported on the current system or device."),
-    802: (
-        "This error indicates that the system is not yet ready to start any CUDA"
-        " work.  To continue using CUDA, verify the system configuration is in a"
-        " valid state and all required driver daemons are actively running."
-        " More information about this error can be found in the system specific"
-        " user guide."
-    ),
-    803: (
-        "This error indicates that there is a mismatch between the versions of"
-        " the display driver and the CUDA driver. Refer to the compatibility documentation"
-        " for supported versions."
-    ),
-    804: (
-        "This error indicates that the system was upgraded to run with forward compatibility"
-        " but the visible hardware detected by CUDA does not support this configuration."
-        " Refer to the compatibility documentation for the supported hardware matrix or ensure"
-        " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"
-        " environment variable."
-    ),
-    805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",
-    806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",
-    807: (
-        "This error indicates that the MPS server is not ready to accept new MPS client requests."
-        " This error can be returned when the MPS server is in the process of recovering from a fatal failure."
-    ),
-    808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",
-    809: "This error indicates the the hardware resources required to device connections have been exhausted.",
-    810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",
-    811: "This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",
-    812: "This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",
-    900: "The operation is not permitted when the stream is capturing.",
-    901: ("The current capture sequence on the stream has been invalidated due to a previous error."),
-    902: ("The operation would have resulted in a merge of two independent capture sequences."),
-    903: "The capture was not initiated in this stream.",
-    904: ("The capture sequence contains a fork that was not joined to the primary stream."),
-    905: (
-        "A dependency would have been created which crosses the capture sequence"
-        " boundary. Only implicit in-stream ordering dependencies are allowed to"
-        " cross the boundary."
-    ),
-    906: (
-        "The operation would have resulted in a disallowed implicit dependency on"
-        " a current capture sequence from cudaStreamLegacy."
-    ),
-    907: ("The operation is not permitted on an event which was last recorded in a capturing stream."),
-    908: (
-        "A stream capture sequence not initiated with the ::cudaStreamCaptureModeRelaxed"
-        " argument to ::cudaStreamBeginCapture was passed to ::cudaStreamEndCapture in a"
-        " different thread."
-    ),
-    909: "This indicates that the wait operation has timed out.",
-    910: (
-        "This error indicates that the graph update was not performed because it included"
-        " changes which violated constraints specific to instantiated graph update."
-    ),
-    911: (
-        "This indicates that an async error has occurred in a device outside of CUDA."
-        " If CUDA was waiting for an external device's signal before consuming shared data,"
-        " the external device signaled an error indicating that the data is not valid for"
-        " consumption. This leaves the process in an inconsistent state and any further CUDA"
-        " work will return the same error. To continue using CUDA, the process must be"
-        " terminated and relaunched."
-    ),
-    912: ("This indicates that a kernel launch error has occurred due to cluster misconfiguration."),
-    913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
-    914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),
-    915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),
-    917: (
-        "This error indicates that the requested operation is not permitted because the"
-        " stream is in a detached state. This can occur if the green context associated"
-        " with the stream has been destroyed, limiting the stream's operational capabilities."
-    ),
-    999: "This indicates that an unknown internal error has occurred.",
-    10000: (
-        "Any unhandled CUDA driver error is added to this value and returned via"
-        " the runtime. Production releases of CUDA should not return such errors."
-        " This error return is deprecated as of CUDA 4.1."
-    ),
-}
 
-RUNTIME_CUDA_ERROR_EXPLANATIONS = get_best_available_explanations(runtime.cudaError_t, _FALLBACK_EXPLANATIONS)
+def _load_fallback_explanations():
+    from cuda.core._utils.runtime_cuda_error_explanations_frozen import _FALLBACK_EXPLANATIONS
+
+    return _FALLBACK_EXPLANATIONS
+
+
+RUNTIME_CUDA_ERROR_EXPLANATIONS = get_best_available_explanations(runtime.cudaError_t, _load_fallback_explanations)
diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations_frozen.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations_frozen.py
new file mode 100644
index 0000000000..497b2ad20d
--- /dev/null
+++ b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations_frozen.py
@@ -0,0 +1,538 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+# CUDA Toolkit v13.1.1
+_FALLBACK_EXPLANATIONS = {
+    0: (
+        "The API call returned with no errors. In the case of query calls, this"
+        " also means that the operation being queried is complete (see"
+        " ::cudaEventQuery() and ::cudaStreamQuery())."
+    ),
+    1: (
+        "This indicates that one or more of the parameters passed to the API call"
+        " is not within an acceptable range of values."
+    ),
+    2: (
+        "The API call failed because it was unable to allocate enough memory or"
+        " other resources to perform the requested operation."
+    ),
+    3: ("The API call failed because the CUDA driver and runtime could not be initialized."),
+    4: (
+        "This indicates that a CUDA Runtime API call cannot be executed because"
+        " it is being called during process shut down, at a point in time after"
+        " CUDA driver has been unloaded."
+    ),
+    5: (
+        "This indicates profiler is not initialized for this run. This can"
+        " happen when the application is running with external profiling tools"
+        " like visual profiler."
+    ),
+    6: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to attempt to enable/disable the profiling via ::cudaProfilerStart or"
+        " ::cudaProfilerStop without initialization."
+    ),
+    7: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to call cudaProfilerStart() when profiling is already enabled."
+    ),
+    8: (
+        "This error return is deprecated as of CUDA 5.0. It is no longer an error"
+        " to call cudaProfilerStop() when profiling is already disabled."
+    ),
+    9: (
+        "This indicates that a kernel launch is requesting resources that can"
+        " never be satisfied by the current device. Requesting more shared memory"
+        " per block than the device supports will trigger this error, as will"
+        " requesting too many threads or blocks. See ::cudaDeviceProp for more"
+        " device limitations."
+    ),
+    12: (
+        "This indicates that one or more of the pitch-related parameters passed"
+        " to the API call is not within the acceptable range for pitch."
+    ),
+    13: ("This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier."),
+    16: (
+        "This indicates that at least one host pointer passed to the API call is"
+        " not a valid host pointer."
+        " This error return is deprecated as of CUDA 10.1."
+    ),
+    17: (
+        "This indicates that at least one device pointer passed to the API call is"
+        " not a valid device pointer."
+        " This error return is deprecated as of CUDA 10.1."
+    ),
+    18: ("This indicates that the texture passed to the API call is not a valid texture."),
+    19: (
+        "This indicates that the texture binding is not valid. This occurs if you"
+        " call ::cudaGetTextureAlignmentOffset() with an unbound texture."
+    ),
+    20: (
+        "This indicates that the channel descriptor passed to the API call is not"
+        " valid. This occurs if the format is not one of the formats specified by"
+        " ::cudaChannelFormatKind, or if one of the dimensions is invalid."
+    ),
+    21: (
+        "This indicates that the direction of the memcpy passed to the API call is"
+        " not one of the types specified by ::cudaMemcpyKind."
+    ),
+    22: (
+        "This indicated that the user has taken the address of a constant variable,"
+        " which was forbidden up until the CUDA 3.1 release."
+        " This error return is deprecated as of CUDA 3.1. Variables in constant"
+        " memory may now have their address taken by the runtime via"
+        " ::cudaGetSymbolAddress()."
+    ),
+    23: (
+        "This indicated that a texture fetch was not able to be performed."
+        " This was previously used for device emulation of texture operations."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    24: (
+        "This indicated that a texture was not bound for access."
+        " This was previously used for device emulation of texture operations."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    25: (
+        "This indicated that a synchronization operation had failed."
+        " This was previously used for some device emulation functions."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    26: (
+        "This indicates that a non-float texture was being accessed with linear"
+        " filtering. This is not supported by CUDA."
+    ),
+    27: (
+        "This indicates that an attempt was made to read an unsupported data type as a"
+        " normalized float. This is not supported by CUDA."
+    ),
+    28: (
+        "Mixing of device and device emulation code was not allowed."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    31: (
+        "This indicates that the API call is not yet implemented. Production"
+        " releases of CUDA will never return this error."
+        " This error return is deprecated as of CUDA 4.1."
+    ),
+    32: (
+        "This indicated that an emulated device pointer exceeded the 32-bit address"
+        " range."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    34: (
+        "This indicates that the CUDA driver that the application has loaded is a"
+        " stub library. Applications that run with the stub rather than a real"
+        " driver loaded will result in CUDA API returning this error."
+    ),
+    35: (
+        "This indicates that the installed NVIDIA CUDA driver is older than the"
+        " CUDA runtime library. This is not a supported configuration. Users should"
+        " install an updated NVIDIA display driver to allow the application to run."
+    ),
+    36: (
+        "This indicates that the API call requires a newer CUDA driver than the one"
+        " currently installed. Users should install an updated NVIDIA CUDA driver"
+        " to allow the API call to succeed."
+    ),
+    37: ("This indicates that the surface passed to the API call is not a valid surface."),
+    43: (
+        "This indicates that multiple global or constant variables (across separate"
+        " CUDA source files in the application) share the same string name."
+    ),
+    44: (
+        "This indicates that multiple textures (across separate CUDA source"
+        " files in the application) share the same string name."
+    ),
+    45: (
+        "This indicates that multiple surfaces (across separate CUDA source"
+        " files in the application) share the same string name."
+    ),
+    46: (
+        "This indicates that all CUDA devices are busy or unavailable at the current"
+        " time. Devices are often busy/unavailable due to use of"
+        " ::cudaComputeModeProhibited, ::cudaComputeModeExclusiveProcess, or when long"
+        " running CUDA kernels have filled up the GPU and are blocking new work"
+        " from starting. They can also be unavailable due to memory constraints"
+        " on a device that already has active CUDA work being performed."
+    ),
+    49: (
+        "This indicates that the current context is not compatible with this"
+        " the CUDA Runtime. This can only occur if you are using CUDA"
+        " Runtime/Driver interoperability and have created an existing Driver"
+        " context using the driver API. The Driver context may be incompatible"
+        " either because the Driver context was created using an older version"
+        " of the API, because the Runtime API call expects a primary driver"
+        " context and the Driver context is not primary, or because the Driver"
+        ' context has been destroyed. Please see CUDART_DRIVER "Interactions'
+        ' with the CUDA Driver API" for more information.'
+    ),
+    52: (
+        "The device function being invoked (usually via ::cudaLaunchKernel()) was not"
+        " previously configured via the ::cudaConfigureCall() function."
+    ),
+    53: (
+        "This indicated that a previous kernel launch failed. This was previously"
+        " used for device emulation of kernel launches."
+        " This error return is deprecated as of CUDA 3.1. Device emulation mode was"
+        " removed with the CUDA 3.1 release."
+    ),
+    65: (
+        "This error indicates that a device runtime grid launch did not occur"
+        " because the depth of the child grid would exceed the maximum supported"
+        " number of nested grid launches."
+    ),
+    66: (
+        "This error indicates that a grid launch did not occur because the kernel"
+        " uses file-scoped textures which are unsupported by the device runtime."
+        " Kernels launched via the device runtime only support textures created with"
+        " the Texture Object API's."
+    ),
+    67: (
+        "This error indicates that a grid launch did not occur because the kernel"
+        " uses file-scoped surfaces which are unsupported by the device runtime."
+        " Kernels launched via the device runtime only support surfaces created with"
+        " the Surface Object API's."
+    ),
+    68: (
+        "This error indicates that a call to ::cudaDeviceSynchronize made from"
+        " the device runtime failed because the call was made at grid depth greater"
+        " than than either the default (2 levels of grids) or user specified device"
+        " limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on"
+        " launched grids at a greater depth successfully, the maximum nested"
+        " depth at which ::cudaDeviceSynchronize will be called must be specified"
+        " with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit"
+        " api before the host-side launch of a kernel using the device runtime."
+        " Keep in mind that additional levels of sync depth require the runtime"
+        " to reserve large amounts of device memory that cannot be used for"
+        " user allocations. Note that ::cudaDeviceSynchronize made from device"
+        " runtime is only supported on devices of compute capability < 9.0."
+    ),
+    69: (
+        "This error indicates that a device runtime grid launch failed because"
+        " the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount."
+        " For this launch to proceed successfully, ::cudaDeviceSetLimit must be"
+        " called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher"
+        " than the upper bound of outstanding launches that can be issued to the"
+        " device runtime. Keep in mind that raising the limit of pending device"
+        " runtime launches will require the runtime to reserve device memory that"
+        " cannot be used for user allocations."
+    ),
+    98: ("The requested device function does not exist or is not compiled for the proper device architecture."),
+    100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."),
+    101: (
+        "This indicates that the device ordinal supplied by the user does not"
+        " correspond to a valid CUDA device or that the action requested is"
+        " invalid for the specified device."
+    ),
+    102: "This indicates that the device doesn't have a valid Grid License.",
+    103: (
+        "By default, the CUDA runtime may perform a minimal set of self-tests,"
+        " as well as CUDA driver tests, to establish the validity of both."
+        " Introduced in CUDA 11.2, this error return indicates that at least one"
+        " of these tests has failed and the validity of either the runtime"
+        " or the driver could not be established."
+    ),
+    127: "This indicates an internal startup failure in the CUDA runtime.",
+    200: "This indicates that the device kernel image is invalid.",
+    201: (
+        "This most frequently indicates that there is no context bound to the"
+        " current thread. This can also be returned if the context passed to an"
+        " API call is not a valid handle (such as a context that has had"
+        " ::cuCtxDestroy() invoked on it). This can also be returned if a user"
+        " mixes different API versions (i.e. 3010 context with 3020 API calls)."
+        " See ::cuCtxGetApiVersion() for more details."
+    ),
+    205: "This indicates that the buffer object could not be mapped.",
+    206: "This indicates that the buffer object could not be unmapped.",
+    207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."),
+    208: "This indicates that the resource is already mapped.",
+    209: (
+        "This indicates that there is no kernel image available that is suitable"
+        " for the device. This can occur when a user specifies code generation"
+        " options for a particular CUDA source file that do not include the"
+        " corresponding device configuration."
+    ),
+    210: "This indicates that a resource has already been acquired.",
+    211: "This indicates that a resource is not mapped.",
+    212: ("This indicates that a mapped resource is not available for access as an array."),
+    213: ("This indicates that a mapped resource is not available for access as a pointer."),
+    214: ("This indicates that an uncorrectable ECC error was detected during execution."),
+    215: ("This indicates that the ::cudaLimit passed to the API call is not supported by the active device."),
+    216: (
+        "This indicates that a call tried to access an exclusive-thread device that"
+        " is already in use by a different thread."
+    ),
+    217: ("This error indicates that P2P access is not supported across the given devices."),
+    218: (
+        "A PTX compilation failed. The runtime may fall back to compiling PTX if"
+        " an application does not contain a suitable binary for the current device."
+    ),
+    219: "This indicates an error with the OpenGL or DirectX context.",
+    220: ("This indicates that an uncorrectable NVLink error was detected during the execution."),
+    221: (
+        "This indicates that the PTX JIT compiler library was not found. The JIT Compiler"
+        " library is used for PTX compilation. The runtime may fall back to compiling PTX"
+        " if an application does not contain a suitable binary for the current device."
+    ),
+    222: (
+        "This indicates that the provided PTX was compiled with an unsupported toolchain."
+        " The most common reason for this, is the PTX was generated by a compiler newer"
+        " than what is supported by the CUDA driver and PTX JIT compiler."
+    ),
+    223: (
+        "This indicates that the JIT compilation was disabled. The JIT compilation compiles"
+        " PTX. The runtime may fall back to compiling PTX if an application does not contain"
+        " a suitable binary for the current device."
+    ),
+    224: "This indicates that the provided execution affinity is not supported by the device.",
+    225: (
+        "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize."
+    ),
+    226: (
+        "This indicates that an exception occurred on the device that is now"
+        " contained by the GPU's error containment capability. Common causes are -"
+        " a. Certain types of invalid accesses of peer GPU memory over nvlink"
+        " b. Certain classes of hardware errors"
+        " This leaves the process in an inconsistent state and any further CUDA"
+        " work will return the same error. To continue using CUDA, the process must"
+        " be terminated and relaunched."
+    ),
+    300: "This indicates that the device kernel source is invalid.",
+    301: "This indicates that the file specified was not found.",
+    302: "This indicates that a link to a shared object failed to resolve.",
+    303: "This indicates that initialization of a shared object failed.",
+    304: "This error indicates that an OS call failed.",
+    400: (
+        "This indicates that a resource handle passed to the API call was not"
+        " valid. Resource handles are opaque types like ::cudaStream_t and"
+        " ::cudaEvent_t."
+    ),
+    401: (
+        "This indicates that a resource required by the API call is not in a"
+        " valid state to perform the requested operation."
+    ),
+    402: (
+        "This indicates an attempt was made to introspect an object in a way that"
+        " would discard semantically important information. This is either due to"
+        " the object using funtionality newer than the API version used to"
+        " introspect it or omission of optional return arguments."
+    ),
+    500: (
+        "This indicates that a named symbol was not found. Examples of symbols"
+        " are global/constant variable names, driver function names, texture names,"
+        " and surface names."
+    ),
+    600: (
+        "This indicates that asynchronous operations issued previously have not"
+        " completed yet. This result is not actually an error, but must be indicated"
+        " differently than ::cudaSuccess (which indicates completion). Calls that"
+        " may return this value include ::cudaEventQuery() and ::cudaStreamQuery()."
+    ),
+    700: (
+        "The device encountered a load or store instruction on an invalid memory address."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    701: (
+        "This indicates that a launch did not occur because it did not have"
+        " appropriate resources. Although this error is similar to"
+        " ::cudaErrorInvalidConfiguration, this error usually indicates that the"
+        " user has attempted to pass too many arguments to the device kernel, or the"
+        " kernel launch specifies too many threads for the kernel's register count."
+    ),
+    702: (
+        "This indicates that the device kernel took too long to execute. This can"
+        " only occur if timeouts are enabled - see the device attribute"
+        ' ::cudaDeviceAttr::cudaDevAttrKernelExecTimeout "cudaDevAttrKernelExecTimeout"'
+        " for more information."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    703: ("This error indicates a kernel launch that uses an incompatible texturing mode."),
+    704: (
+        "This error indicates that a call to ::cudaDeviceEnablePeerAccess() is"
+        " trying to re-enable peer addressing on from a context which has already"
+        " had peer addressing enabled."
+    ),
+    705: (
+        "This error indicates that ::cudaDeviceDisablePeerAccess() is trying to"
+        " disable peer addressing which has not been enabled yet via"
+        " ::cudaDeviceEnablePeerAccess()."
+    ),
+    708: (
+        "This indicates that the user has called ::cudaSetValidDevices(),"
+        " ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice(),"
+        " ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or"
+        " ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by"
+        " calling non-device management operations (allocating memory and"
+        " launching kernels are examples of non-device management operations)."
+        " This error can also be returned if using runtime/driver"
+        " interoperability and there is an existing ::CUcontext active on the"
+        " host thread."
+    ),
+    709: (
+        "This error indicates that the context current to the calling thread"
+        " has been destroyed using ::cuCtxDestroy, or is a primary context which"
+        " has not yet been initialized."
+    ),
+    710: (
+        "An assert triggered in device code during kernel execution. The device"
+        " cannot be used again. All existing allocations are invalid. To continue"
+        " using CUDA, the process must be terminated and relaunched."
+    ),
+    711: (
+        "This error indicates that the hardware resources required to enable"
+        " peer access have been exhausted for one or more of the devices"
+        " passed to ::cudaEnablePeerAccess()."
+    ),
+    712: ("This error indicates that the memory range passed to ::cudaHostRegister() has already been registered."),
+    713: (
+        "This error indicates that the pointer passed to ::cudaHostUnregister()"
+        " does not correspond to any currently registered memory region."
+    ),
+    714: (
+        "Device encountered an error in the call stack during kernel execution,"
+        " possibly due to stack corruption or exceeding the stack size limit."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    715: (
+        "The device encountered an illegal instruction during kernel execution"
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    716: (
+        "The device encountered a load or store instruction"
+        " on a memory address which is not aligned."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    717: (
+        "While executing a kernel, the device encountered an instruction"
+        " which can only operate on memory locations in certain address spaces"
+        " (global, shared, or local), but was supplied a memory address not"
+        " belonging to an allowed address space."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    718: (
+        "The device encountered an invalid program counter."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    719: (
+        "An exception occurred on the device while executing a kernel. Common"
+        " causes include dereferencing an invalid device pointer and accessing"
+        " out of bounds shared memory. Less common cases can be system specific - more"
+        " information about these cases can be found in the system specific user guide."
+        " This leaves the process in an inconsistent state and any further CUDA work"
+        " will return the same error. To continue using CUDA, the process must be terminated"
+        " and relaunched."
+    ),
+    720: (
+        "This error indicates that the number of blocks launched per grid for a kernel that was"
+        " launched via either ::cudaLaunchCooperativeKernel"
+        " exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor"
+        " or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors"
+        " as specified by the device attribute ::cudaDevAttrMultiProcessorCount."
+    ),
+    721: (
+        "An exception occurred on the device while exiting a kernel using tensor memory: the"
+        " tensor memory was not completely deallocated. This leaves the process in an inconsistent"
+        " state and any further CUDA work will return the same error. To continue using CUDA, the"
+        " process must be terminated and relaunched."
+    ),
+    800: "This error indicates the attempted operation is not permitted.",
+    801: ("This error indicates the attempted operation is not supported on the current system or device."),
+    802: (
+        "This error indicates that the system is not yet ready to start any CUDA"
+        " work.  To continue using CUDA, verify the system configuration is in a"
+        " valid state and all required driver daemons are actively running."
+        " More information about this error can be found in the system specific"
+        " user guide."
+    ),
+    803: (
+        "This error indicates that there is a mismatch between the versions of"
+        " the display driver and the CUDA driver. Refer to the compatibility documentation"
+        " for supported versions."
+    ),
+    804: (
+        "This error indicates that the system was upgraded to run with forward compatibility"
+        " but the visible hardware detected by CUDA does not support this configuration."
+        " Refer to the compatibility documentation for the supported hardware matrix or ensure"
+        " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES"
+        " environment variable."
+    ),
+    805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.",
+    806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.",
+    807: (
+        "This error indicates that the MPS server is not ready to accept new MPS client requests."
+        " This error can be returned when the MPS server is in the process of recovering from a fatal failure."
+    ),
+    808: "This error indicates that the hardware resources required to create MPS client have been exhausted.",
+    809: "This error indicates the the hardware resources required to device connections have been exhausted.",
+    810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.",
+    811: "This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.",
+    812: "This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.",
+    900: "The operation is not permitted when the stream is capturing.",
+    901: ("The current capture sequence on the stream has been invalidated due to a previous error."),
+    902: ("The operation would have resulted in a merge of two independent capture sequences."),
+    903: "The capture was not initiated in this stream.",
+    904: ("The capture sequence contains a fork that was not joined to the primary stream."),
+    905: (
+        "A dependency would have been created which crosses the capture sequence"
+        " boundary. Only implicit in-stream ordering dependencies are allowed to"
+        " cross the boundary."
+    ),
+    906: (
+        "The operation would have resulted in a disallowed implicit dependency on"
+        " a current capture sequence from cudaStreamLegacy."
+    ),
+    907: ("The operation is not permitted on an event which was last recorded in a capturing stream."),
+    908: (
+        "A stream capture sequence not initiated with the ::cudaStreamCaptureModeRelaxed"
+        " argument to ::cudaStreamBeginCapture was passed to ::cudaStreamEndCapture in a"
+        " different thread."
+    ),
+    909: "This indicates that the wait operation has timed out.",
+    910: (
+        "This error indicates that the graph update was not performed because it included"
+        " changes which violated constraints specific to instantiated graph update."
+    ),
+    911: (
+        "This indicates that an async error has occurred in a device outside of CUDA."
+        " If CUDA was waiting for an external device's signal before consuming shared data,"
+        " the external device signaled an error indicating that the data is not valid for"
+        " consumption. This leaves the process in an inconsistent state and any further CUDA"
+        " work will return the same error. To continue using CUDA, the process must be"
+        " terminated and relaunched."
+    ),
+    912: ("This indicates that a kernel launch error has occurred due to cluster misconfiguration."),
+    913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."),
+    914: ("This error indicates one or more resources passed in are not valid resource types for the operation."),
+    915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."),
+    917: (
+        "This error indicates that the requested operation is not permitted because the"
+        " stream is in a detached state. This can occur if the green context associated"
+        " with the stream has been destroyed, limiting the stream's operational capabilities."
+    ),
+    999: "This indicates that an unknown internal error has occurred.",
+    10000: (
+        "Any unhandled CUDA driver error is added to this value and returned via"
+        " the runtime. Production releases of CUDA should not return such errors."
+        " This error return is deprecated as of CUDA 4.1."
+    ),
+}
diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py
index d31e40ee47..d46355aefd 100644
--- a/cuda_core/tests/test_utils_enum_explanations_helpers.py
+++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py
@@ -2,6 +2,9 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
+import importlib
+import sys
+
 import pytest
 
 from cuda.core._utils import enum_explanations_helpers
@@ -124,3 +127,44 @@ def test_get_best_available_explanations_switches_by_version(monkeypatch, versio
         assert expl.get(7) == "clean me"
     else:
         assert expl is fallback
+
+
+def test_get_best_available_explanations_calls_loader_before_docstrings(monkeypatch):
+    fallback = {7: "fallback text"}
+    calls = []
+
+    def load_fallback():
+        calls.append("loaded")
+        return fallback
+
+    monkeypatch.setattr(enum_explanations_helpers, "_binding_version", lambda: (13, 1, 1))
+    expl = enum_explanations_helpers.get_best_available_explanations(
+        _FakeEnumType({7: _FakeEnumMember("clean me")}),
+        load_fallback,
+    )
+    assert expl is fallback
+    assert calls == ["loaded"]
+
+
+def test_driver_explanations_module_skips_fallback_import_when_docstrings_available(monkeypatch):
+    import cuda.core._utils.driver_cu_result_explanations as driver_explanations
+
+    monkeypatch.setattr(enum_explanations_helpers, "_binding_version", lambda: (13, 2, 0))
+    sys.modules.pop("cuda.core._utils.driver_cu_result_explanations_frozen", None)
+
+    importlib.reload(driver_explanations)
+
+    assert "cuda.core._utils.driver_cu_result_explanations_frozen" not in sys.modules
+    assert isinstance(driver_explanations.DRIVER_CU_RESULT_EXPLANATIONS, DocstringBackedExplanations)
+
+
+def test_runtime_explanations_module_skips_fallback_import_when_docstrings_available(monkeypatch):
+    import cuda.core._utils.runtime_cuda_error_explanations as runtime_explanations
+
+    monkeypatch.setattr(enum_explanations_helpers, "_binding_version", lambda: (13, 2, 0))
+    sys.modules.pop("cuda.core._utils.runtime_cuda_error_explanations_frozen", None)
+
+    importlib.reload(runtime_explanations)
+
+    assert "cuda.core._utils.runtime_cuda_error_explanations_frozen" not in sys.modules
+    assert isinstance(runtime_explanations.RUNTIME_CUDA_ERROR_EXPLANATIONS, DocstringBackedExplanations)

From 27aae00d7e4529b1aa372d53b57fe698d58bfb7c Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rgrossekunst@nvidia.com>
Date: Mon, 6 Apr 2026 12:55:39 -0700
Subject: [PATCH 9/9] cuda_core: pin cleanup-sensitive enum doc examples

Add a small set of real enum-doc cleanup examples that assert today's exact cleaned output for representative live bindings cases. Mark unexpected drift as xfail so future upstream doc changes trigger manual review without causing a hard test failure.

Made-with: Cursor
---
 cuda_core/tests/test_cuda_utils.py | 72 ++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index 7dc3703749..1357ca3a12 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -19,6 +19,15 @@ def _skip_if_bindings_pre_enum_docstrings():
         pytest.skip("cuda-bindings version does not expose usable enum __doc__ strings")
 
 
+def _assert_cleanup_example_matches_or_xfail(actual, expected):
+    # Pin a few real cleanup-sensitive enum docs. If one starts failing, review
+    # the raw ``__doc__`` and today's cleaned output: either update the expected
+    # text to match an acceptable upstream change, or fix the cleanup logic.
+    if actual != expected:
+        pytest.xfail("please review this failure")
+    assert actual == expected
+
+
 def test_check_driver_error():
     num_unexpected = 0
     for error in driver.CUresult:
@@ -73,6 +82,69 @@ def test_runtime_error_enum_has_non_empty_docstring():
     assert doc.strip() != ""
 
 
+# These use real enum members rather than synthetic strings, to pin a few
+# representative cleanup-sensitive docs end to end. Together with the helper
+# unit tests, this gives a harder assurance that today's live bindings output
+# is rendered into the user-facing text we expect. Unexpected changes are
+# marked as xfail so they prompt manual review of the drift, without causing
+# a hard test failure.
+@pytest.mark.parametrize(
+    ("explanations", "error", "expected"),
+    [
+        pytest.param(
+            cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS,
+            driver.CUresult.CUDA_ERROR_NOT_INITIALIZED,
+            "This indicates that the CUDA driver has not been initialized with cuInit() or that initialization has failed.",
+            id="driver_not_initialized_role_cleanup",
+        ),
+        pytest.param(
+            cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS,
+            driver.CUresult.CUDA_ERROR_INVALID_CONTEXT,
+            (
+                "This most frequently indicates that there is no context bound to the current thread. "
+                "This can also be returned if the context passed to an API call is not a valid handle "
+                "(such as a context that has had cuCtxDestroy() invoked on it). This can also be "
+                "returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls). "
+                "See cuCtxGetApiVersion() for more details. This can also be returned if the green "
+                "context passed to an API call was not converted to a CUcontext using cuCtxFromGreenCtx API."
+            ),
+            id="driver_invalid_context_multiple_roles",
+        ),
+        pytest.param(
+            cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS,
+            runtime.cudaError_t.cudaErrorLaunchTimeout,
+            (
+                "This indicates that the device kernel took too long to execute. This can only occur "
+                "if timeouts are enabled - see the device attribute cudaDevAttrKernelExecTimeout for "
+                "more information. This leaves the process in an inconsistent state and any further "
+                "CUDA work will return the same error. To continue using CUDA, the process must be "
+                "terminated and relaunched."
+            ),
+            id="runtime_launch_timeout_role_cleanup",
+        ),
+        pytest.param(
+            cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS,
+            runtime.cudaError_t.cudaErrorIncompatibleDriverContext,
+            (
+                "This indicates that the current context is not compatible with this the CUDA Runtime. "
+                "This can only occur if you are using CUDA Runtime/Driver interoperability and have "
+                "created an existing Driver context using the driver API. The Driver context may be "
+                "incompatible either because the Driver context was created using an older version of "
+                "the API, because the Runtime API call expects a primary driver context and the Driver "
+                "context is not primary, or because the Driver context has been destroyed. Please see "
+                '"Interactions with the CUDA Driver API" for more information.'
+            ),
+            id="runtime_incompatible_driver_context_codegen_bug",
+        ),
+    ],
+)
+def test_enum_doc_cleanup_examples_are_reviewed_on_change(explanations, error, expected):
+    _skip_if_bindings_pre_enum_docstrings()
+
+    actual = explanations.get(int(error))
+    _assert_cleanup_example_matches_or_xfail(actual, expected)
+
+
 def test_check_driver_error_attaches_explanation():
     error = driver.CUresult.CUDA_ERROR_INVALID_VALUE
     name_err, name = driver.cuGetErrorName(error)