From 4cbe3f3b5d2bb85c0957a242bfe4161bc7d78866 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 4 Apr 2026 21:59:22 -0700 Subject: [PATCH 1/9] cuda_core: derive error enum explanations from bindings docstrings Use cleaned driver/runtime enum __doc__ text from cuda-bindings 13.2.0+ as the primary source for CUDA error explanations in cuda_core, while freezing the 13.1.1 explanation tables as fallback for older bindings. Centralize the version-gated selection and docstring cleanup helpers, update the driver/runtime explanation modules to use them, add tests that verify representative enums expose __doc__ and that cuda_utils attaches the explanation text, and remove the obsolete enum-reformat toolshed helper script. Made-with: Cursor --- .../_utils/driver_cu_result_explanations.py | 29 ++-- .../core/_utils/enum_explanations_helpers.py | 127 ++++++++++++++++++ .../_utils/runtime_cuda_error_explanations.py | 34 ++--- cuda_core/tests/test_cuda_utils.py | 85 +++++++----- .../test_utils_enum_explanations_helpers.py | 127 ++++++++++++++++++ toolshed/reformat_cuda_enums_as_py.py | 112 --------------- 6 files changed, 333 insertions(+), 181 deletions(-) create mode 100644 cuda_core/cuda/core/_utils/enum_explanations_helpers.py create mode 100644 cuda_core/tests/test_utils_enum_explanations_helpers.py delete mode 100755 toolshed/reformat_cuda_enums_as_py.py diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py index 0b085520a6..5184e72650 100644 --- a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py +++ b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py @@ -1,13 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# To regenerate the dictionary below run: -# ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/cuda.h -# Replace the dictionary below with the output. -# Also update the CUDA Toolkit version number below. +from cuda.bindings import driver +from cuda.core._utils.enum_explanations_helpers import get_best_available_explanations -# CUDA Toolkit v13.2.0 -DRIVER_CU_RESULT_EXPLANATIONS = { +# CUDA Toolkit v13.1.1 +_FALLBACK_EXPLANATIONS = { 0: ( "The API call returned with no errors. In the case of query calls, this" " also means that the operation being queried is complete (see" @@ -334,15 +332,12 @@ " changes which violated constraints specific to instantiated graph update." ), 911: ( - "This indicates that an error has occurred in a device outside of GPU. It can be a" - " synchronous error w.r.t. CUDA API or an asynchronous error from the external device." - " In case of asynchronous error, it means that if cuda was waiting for an external device's" - " signal before consuming shared data, the external device signaled an error indicating that" - " the data is not valid for consumption. This leaves the process in an inconsistent" - " state and any further CUDA work will return the same error. To continue using CUDA," - " the process must be terminated and relaunched." - " In case of synchronous error, it means that one or more external devices" - " have encountered an error and cannot complete the operation." + "This indicates that an async error has occurred in a device outside of CUDA." + " If CUDA was waiting for an external device's signal before consuming shared data," + " the external device signaled an error indicating that the data is not valid for" + " consumption. This leaves the process in an inconsistent state and any further CUDA" + " work will return the same error. To continue using CUDA, the process must be" + " terminated and relaunched." ), 912: "Indicates a kernel launch error due to cluster misconfiguration.", 913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."), @@ -356,3 +351,5 @@ ), 999: "This indicates that an unknown internal error has occurred.", } + +DRIVER_CU_RESULT_EXPLANATIONS = get_best_available_explanations(driver.CUresult, _FALLBACK_EXPLANATIONS) diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py new file mode 100644 index 0000000000..8a4e8240c2 --- /dev/null +++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py @@ -0,0 +1,127 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +"""Internal support for error-enum explanations. + +``cuda_core`` keeps frozen 13.1.1 fallback tables for older ``cuda-bindings`` +releases. Starting with ``cuda-bindings`` 13.2.0, driver/runtime error enums +carry usable ``__doc__`` text. This module decides which source to use and +normalizes generated docstrings so user-facing ``CUDAError`` messages stay +close to the long-form explanation prose. + +The cleanup rules here were derived while validating docstring-vs-dict parity +in PR #1805. Keep them narrow and remove them when codegen / fallback support is +no longer needed. +""" + +from __future__ import annotations + +import importlib.metadata +import re +from typing import Any + +_MIN_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (13, 2, 0) + + +# ``version.pyx`` cannot be reused here (circular import via ``cuda_utils``). +def _binding_version() -> tuple[int, int, int]: + """Return the installed ``cuda-bindings`` version, or a conservative old value.""" + try: + parts = importlib.metadata.version("cuda-bindings").split(".")[:3] + except importlib.metadata.PackageNotFoundError: + return (0, 0, 0) # For very old versions of cuda-python + return tuple(int(v) for v in parts) + + +def _strip_doxygen_double_colon_prefixes(s: str) -> str: + """Remove Doxygen-style ``::`` before CUDA identifiers (not C++ ``Foo::Bar`` scope). + + The frozen fallback tables come from CUDA header comments and therefore use + Doxygen ``::name`` references. Generated enum ``__doc__`` text uses Sphinx + roles instead, so parity checks need a small amount of normalization. + """ + prev = None + while prev != s: + prev = s + s = re.sub(r"(? str: + """Remove spaces around hyphens introduced by line wrapping in generated ``__doc__`` text. + + This is a narrow workaround for wrapped forms such as ``non- linear`` that + otherwise differ from the single-line fallback prose. + """ + prev = None + while prev != s: + prev = s + s = re.sub(r"([a-z])- ([a-z])", r"\1-\2", s) + s = re.sub(r"([a-z]) -([a-z])", r"\1-\2", s) + return s + + +def clean_enum_member_docstring(doc: str | None) -> str | None: + """Turn an enum member ``__doc__`` into plain text. + + The generated enum docstrings are already close to the fallback explanation + prose, but not byte-identical: they may contain Sphinx inline roles, line + wrapping, or a small known codegen defect. Normalize only those differences + so the text is suitable for user-facing error messages. + """ + if doc is None: + return None + s = doc + # Known codegen bug on cudaErrorIncompatibleDriverContext. Remove once fixed + # in cuda-bindings code generation. Do not use a raw string for the needle: + # r"\n..." would not match the real newline present in __doc__. + s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ') + s = re.sub( + r":(?:py:)?(?:obj|func|meth|class|mod|data|const|exc):`([^`]+)`", + lambda m: re.sub(r"^~?\.", "", m.group(1)), + s, + ) + s = re.sub(r"\*\*([^*]+)\*\*", r"\1", s) + s = re.sub(r"\*([^*]+)\*", r"\1", s) + s = re.sub(r"\s+", " ", s).strip() + s = _fix_hyphenation_wordwrap_spacing(s) + return s + + +class DocstringBackedExplanations: + """``dict.get``-like lookup over enum-member ``__doc__`` strings. + + Once the bindings-version gate says docstrings are available, use them + exclusively. Missing docstrings should surface as ``None`` / ``default`` + rather than silently mixing in frozen fallback prose. + """ + + __slots__ = ("_enum_type",) + + def __init__(self, enum_type: Any) -> None: + self._enum_type = enum_type + + def get(self, code: int, default: str | None = None) -> str | None: + try: + member = self._enum_type(code) + except ValueError: + return default + + raw_doc = member.__doc__ + if raw_doc is None: + return default + + return clean_enum_member_docstring(raw_doc) + + +def get_best_available_explanations( + enum_type: Any, fallback: dict[int, str | tuple[str, ...]] +) -> DocstringBackedExplanations | dict[int, str | tuple[str, ...]]: + """Pick one explanation source per bindings version. + + ``cuda-bindings`` < 13.2.0: use the frozen 13.1.1 fallback tables. + ``cuda-bindings`` >= 13.2.0: use enum-member ``__doc__`` exclusively. + """ + if _binding_version() < _MIN_BINDING_VERSION_FOR_ENUM_DOCSTRINGS: + return fallback + return DocstringBackedExplanations(enum_type) diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py index 4421d50480..5f587a0350 100644 --- a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py +++ b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py @@ -1,13 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# To regenerate the dictionary below run: -# ../../../../../toolshed/reformat_cuda_enums_as_py.py /usr/local/cuda/include/driver_types.h -# Replace the dictionary below with the output. -# Also update the CUDA Toolkit version number below. +from cuda.bindings import runtime +from cuda.core._utils.enum_explanations_helpers import get_best_available_explanations -# CUDA Toolkit v13.2.0 -RUNTIME_CUDA_ERROR_EXPLANATIONS = { +# CUDA Toolkit v13.1.1 +_FALLBACK_EXPLANATIONS = { 0: ( "The API call returned with no errors. In the case of query calls, this" " also means that the operation being queried is complete (see" @@ -52,11 +50,6 @@ " requesting too many threads or blocks. See ::cudaDeviceProp for more" " device limitations." ), - 10: ( - "This indicates that the driver is newer than the runtime version" - " and returned graph node parameter information that the runtime" - " does not understand and is unable to translate." - ), 12: ( "This indicates that one or more of the pitch-related parameters passed" " to the API call is not within the acceptable range for pitch." @@ -523,15 +516,12 @@ " changes which violated constraints specific to instantiated graph update." ), 911: ( - "This indicates that an error has occurred in a device outside of GPU. It can be a" - " synchronous error w.r.t. CUDA API or an asynchronous error from the external device." - " In case of asynchronous error, it means that if cuda was waiting for an external device's" - " signal before consuming shared data, the external device signaled an error indicating that" - " the data is not valid for consumption. This leaves the process in an inconsistent" - " state and any further CUDA work will return the same error. To continue using CUDA," - " the process must be terminated and relaunched." - " In case of synchronous error, it means that one or more external devices" - " have encountered an error and cannot complete the operation." + "This indicates that an async error has occurred in a device outside of CUDA." + " If CUDA was waiting for an external device's signal before consuming shared data," + " the external device signaled an error indicating that the data is not valid for" + " consumption. This leaves the process in an inconsistent state and any further CUDA" + " work will return the same error. To continue using CUDA, the process must be" + " terminated and relaunched." ), 912: ("This indicates that a kernel launch error has occurred due to cluster misconfiguration."), 913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."), @@ -549,3 +539,5 @@ " This error return is deprecated as of CUDA 4.1." ), } + +RUNTIME_CUDA_ERROR_EXPLANATIONS = get_best_available_explanations(runtime.cudaError_t, _FALLBACK_EXPLANATIONS) diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index f218182766..238a2df047 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -11,40 +11,11 @@ from cuda.core._utils.clear_error_support import assert_type_str_or_bytes_like, raise_code_path_meant_to_be_unreachable -def test_driver_cu_result_explanations_health(): - expl_dict = cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS - - # Ensure all CUresult enums are in expl_dict - known_codes = set() - for error in driver.CUresult: - code = int(error) - assert code in expl_dict - known_codes.add(code) - - from cuda.core._utils.version import binding_version - - if binding_version() >= (13, 0, 0): - # Ensure expl_dict has no codes not known as a CUresult enum - extra_expl = sorted(set(expl_dict.keys()) - known_codes) - assert not extra_expl - - -def test_runtime_cuda_error_explanations_health(): - expl_dict = cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS - - # Ensure all cudaError_t enums are in expl_dict - known_codes = set() - for error in runtime.cudaError_t: - code = int(error) - assert code in expl_dict - known_codes.add(code) - +def _skip_if_bindings_pre_enum_docstrings(): from cuda.core._utils.version import binding_version - if binding_version() >= (13, 0, 0): - # Ensure expl_dict has no codes not known as a cudaError_t enum - extra_expl = sorted(set(expl_dict.keys()) - known_codes) - assert not extra_expl + if binding_version() < (13, 2, 0): + pytest.skip("cuda-bindings < 13.2.0 may not expose enum __doc__ strings") def test_check_driver_error(): @@ -85,6 +56,56 @@ def test_check_runtime_error(): assert num_unexpected < len(driver.CUresult) * 0.5 +def test_driver_error_enum_has_non_empty_docstring(): + _skip_if_bindings_pre_enum_docstrings() + + doc = driver.CUresult.CUDA_ERROR_INVALID_VALUE.__doc__ + assert doc is not None + assert doc.strip() != "" + + +def test_runtime_error_enum_has_non_empty_docstring(): + _skip_if_bindings_pre_enum_docstrings() + + doc = runtime.cudaError_t.cudaErrorInvalidValue.__doc__ + assert doc is not None + assert doc.strip() != "" + + +def test_check_driver_error_attaches_explanation(): + error = driver.CUresult.CUDA_ERROR_INVALID_VALUE + name_err, name = driver.cuGetErrorName(error) + assert name_err == driver.CUresult.CUDA_SUCCESS + desc_err, desc = driver.cuGetErrorString(error) + assert desc_err == driver.CUresult.CUDA_SUCCESS + expl = cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS.get(int(error)) + assert expl is not None + assert expl != desc.decode() + + with pytest.raises(cuda_utils.CUDAError) as e: + cuda_utils._check_driver_error(error) + + assert str(e.value) == f"{name.decode()}: {expl}" + assert str(e.value) != f"{name.decode()}: {desc.decode()}" + + +def test_check_runtime_error_attaches_explanation(): + error = runtime.cudaError_t.cudaErrorInvalidValue + name_err, name = runtime.cudaGetErrorName(error) + assert name_err == runtime.cudaError_t.cudaSuccess + desc_err, desc = runtime.cudaGetErrorString(error) + assert desc_err == runtime.cudaError_t.cudaSuccess + expl = cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS.get(int(error)) + assert expl is not None + assert expl != desc.decode() + + with pytest.raises(cuda_utils.CUDAError) as e: + cuda_utils._check_runtime_error(error) + + assert str(e.value) == f"{name.decode()}: {expl}" + assert str(e.value) != f"{name.decode()}: {desc.decode()}" + + def test_precondition(): def checker(*args, what=""): if args[0] < 0: diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py new file mode 100644 index 0000000000..c924ecb2cb --- /dev/null +++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py @@ -0,0 +1,127 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import pytest + +from cuda.core._utils.enum_explanations_helpers import ( + DocstringBackedExplanations, + _strip_doxygen_double_colon_prefixes, + clean_enum_member_docstring, +) + + +class _FakeEnumMember: + def __init__(self, doc): + self.__doc__ = doc + + +class _FakeEnumType: + def __init__(self, members): + self._members = members + + def __call__(self, code): + try: + return self._members[code] + except KeyError as e: + raise ValueError(code) from e + + +@pytest.mark.parametrize( + ("raw", "expected"), + [ + pytest.param("a\nb c", "a b c", id="collapse_whitespace"), + pytest.param(" x \n ", "x", id="strip_padding"), + pytest.param( + "see\n:py:obj:`~.cuInit()` or :py:obj:`cuCtxDestroy()`", + "see cuInit() or cuCtxDestroy()", + id="sphinx_py_obj_roles", + ), + pytest.param( + "x :py:func:`~.cudaMalloc()` y", + "x cudaMalloc() y", + id="sphinx_py_func_role", + ), + pytest.param("**Note:** text", "Note: text", id="strip_bold"), + pytest.param("*Note* text", "Note text", id="strip_italic"), + pytest.param("[Deprecated]\n", "[Deprecated]", id="deprecated_line"), + pytest.param("non- linear", "non-linear", id="hyphen_space_after"), + pytest.param("word -word", "word-word", id="hyphen_space_before"), + pytest.param( + 'Please see\n:py:obj:`~.Interactions`with the CUDA Driver API" for more information.', + 'Please see "Interactions with the CUDA Driver API" for more information.', + id="codegen_broken_interactions_role", + ), + ], +) +def test_clean_enum_member_docstring_examples(raw, expected): + assert clean_enum_member_docstring(raw) == expected + + +def test_clean_enum_member_docstring_none_input(): + assert clean_enum_member_docstring(None) is None + + +@pytest.mark.parametrize( + ("raw", "expected"), + [ + pytest.param("see ::CUDA_SUCCESS", "see CUDA_SUCCESS", id="type_ref"), + pytest.param("Foo::Bar unchanged", "Foo::Bar unchanged", id="cpp_scope_preserved"), + pytest.param("::cuInit() and ::CUstream", "cuInit() and CUstream", id="multiple_prefixes"), + ], +) +def test_strip_doxygen_double_colon_prefixes(raw, expected): + assert _strip_doxygen_double_colon_prefixes(raw) == expected + + +def test_docstring_backed_get_returns_default_for_non_enum_code(): + lut = DocstringBackedExplanations(_FakeEnumType({})) + assert lut.get(-1) is None + assert lut.get(-1, default="sentinel") == "sentinel" + + +def test_docstring_backed_get_returns_default_for_missing_docstring(): + lut = DocstringBackedExplanations(_FakeEnumType({7: _FakeEnumMember(None)})) + assert lut.get(7) is None + assert lut.get(7, default="sentinel") == "sentinel" + + +def test_docstring_backed_get_returns_default_for_unknown_code(): + lut = DocstringBackedExplanations(_FakeEnumType({})) + assert lut.get(99, default="sentinel") == "sentinel" + + +def test_docstring_backed_get_returns_default_for_missing_docstring_without_fallback(): + lut = DocstringBackedExplanations(_FakeEnumType({7: _FakeEnumMember(None)})) + assert lut.get(7, default="sentinel") == "sentinel" + + +def test_get_best_available_explanations_uses_fallback_before_13_2(monkeypatch): + import cuda.core._utils.enum_explanations_helpers as cleanup + + fallback = {7: "fallback text"} + monkeypatch.setattr(cleanup, "_binding_version", lambda: (13, 1, 1)) + assert cleanup.get_best_available_explanations(_FakeEnumType({7: _FakeEnumMember("doc")}), fallback) is fallback + + +def test_get_best_available_explanations_prefers_docstrings_from_13_2(monkeypatch): + import cuda.core._utils.enum_explanations_helpers as cleanup + + fallback = {7: "fallback text"} + monkeypatch.setattr(cleanup, "_binding_version", lambda: (13, 2, 0)) + expl = cleanup.get_best_available_explanations( + _FakeEnumType({7: _FakeEnumMember("clean me")}), + fallback, + ) + assert isinstance(expl, DocstringBackedExplanations) + assert expl.get(7) == "clean me" + + +def test_driver_cu_result_explanations_get_matches_clean_docstring(): + pytest.importorskip("cuda.bindings") + from cuda.bindings import driver + from cuda.core._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS + + e = driver.CUresult.CUDA_SUCCESS + code = int(e) + assert DRIVER_CU_RESULT_EXPLANATIONS.get(code) == clean_enum_member_docstring(e.__doc__) diff --git a/toolshed/reformat_cuda_enums_as_py.py b/toolshed/reformat_cuda_enums_as_py.py deleted file mode 100755 index 2b80447fd1..0000000000 --- a/toolshed/reformat_cuda_enums_as_py.py +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env python3 - -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -import sys -from pathlib import Path - - -def extract_enum_block(header_file_lines): - line_iter = iter(header_file_lines) - for line in line_iter: - if line == "typedef enum cudaError_enum {": - closing_line = "} CUresult;" - python_dict_name = "DRIVER_CU_RESULT_EXPLANATIONS" - break - if line == "enum __device_builtin__ cudaError": - line = next(line_iter) - assert line == "{", line - closing_line = "};" - python_dict_name = "RUNTIME_CUDA_ERROR_EXPLANATIONS" - break - else: - raise RuntimeError("Opening line not found.") - block = [] - for line in line_iter: - if line == closing_line: - break - block.append(line) - else: - raise RuntimeError("Closing line not found.") - return python_dict_name, block - - -def parse_enum_doc_and_value_pairs(enum_block): - entries = [] - comment_lines = [] - inside_comment = False - - for line in enum_block: - stripped = line.strip() - if not stripped: - continue - - if stripped.startswith("/**"): - inside_comment = True - comment = stripped[3:].lstrip() - if comment: - comment_lines = [comment] - elif inside_comment: - if stripped.endswith("*/"): - comment = stripped[:-2].strip() - if comment: - comment_lines.append(comment) - inside_comment = False - else: - comment_lines.append(stripped.lstrip("*").strip()) - elif stripped: - assert stripped.count(",") <= 1, line - stripped = stripped.replace(",", "") - flds = stripped.split(" = ") - assert len(flds) == 2, line - try: - val = int(flds[1].strip()) - except Exception as e: - raise RuntimeError(f"Unexpected {line=!r}") from e - entries.append((int(val), comment_lines)) - comment_lines = [] - - return entries - - -def emit_python_dict(python_dict_name, entries): - print(f"{python_dict_name} = {{") - for val, lines in entries: - py_lines = [] - continuation_space = "" - for line in lines: - if line == r"\deprecated": - continue - mod_line = line.replace("\\ref ", "") - assert "\\" not in mod_line, line - mod_line = mod_line.replace('"', '\\"') - py_lines.append(f'"{continuation_space}{mod_line}"') - continuation_space = " " - assert py_lines, lines - if len(py_lines) == 1: - print(f" {val}: {py_lines[0]},") - else: - print(f" {val}: (") - for py_line in py_lines: - print(f" {py_line}") - print(" ),") - print("}") - - -def run(args): - if len(args) != 1: - print( - "Usage: reformat_cuda_enums_as_py.py /path/to/cuda.h|driver_types.h", - file=sys.stderr, - ) - sys.exit(1) - - header_file_text = Path(sys.argv[1]).read_text().splitlines() - python_dict_name, enum_block = extract_enum_block(header_file_text) - entries = parse_enum_doc_and_value_pairs(enum_block) - emit_python_dict(python_dict_name, entries) - - -if __name__ == "__main__": - run(sys.argv[1:]) From 8b0c526f1020c1fbda17c60f5a69b090c6b65dfc Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 5 Apr 2026 09:08:25 -0700 Subject: [PATCH 2/9] cuda_core: recognize 12.9.6 enum docstrings Treat the 12.9.6 backport line as docstring-capable and reuse the same version predicate in tests so error explanations follow the bindings releases that already expose usable enum docs. Made-with: Cursor --- .../core/_utils/enum_explanations_helpers.py | 25 ++++++++---- cuda_core/tests/test_cuda_utils.py | 5 ++- .../test_utils_enum_explanations_helpers.py | 39 ++++++++++++++----- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py index 8a4e8240c2..414c111fce 100644 --- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py +++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py @@ -4,9 +4,10 @@ """Internal support for error-enum explanations. ``cuda_core`` keeps frozen 13.1.1 fallback tables for older ``cuda-bindings`` -releases. Starting with ``cuda-bindings`` 13.2.0, driver/runtime error enums -carry usable ``__doc__`` text. This module decides which source to use and -normalizes generated docstrings so user-facing ``CUDAError`` messages stay +releases. Driver/runtime error enums carry usable ``__doc__`` text starting in +the 12.x backport line at ``cuda-bindings`` 12.9.6, and in the mainline 13.x +series at ``cuda-bindings`` 13.2.0. This module decides which source to use +and normalizes generated docstrings so user-facing ``CUDAError`` messages stay close to the long-form explanation prose. The cleanup rules here were derived while validating docstring-vs-dict parity @@ -20,7 +21,8 @@ import re from typing import Any -_MIN_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (13, 2, 0) +_MIN_12X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (12, 9, 6) +_MIN_13X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (13, 2, 0) # ``version.pyx`` cannot be reused here (circular import via ``cuda_utils``). @@ -33,6 +35,14 @@ def _binding_version() -> tuple[int, int, int]: return tuple(int(v) for v in parts) +def _binding_version_has_usable_enum_docstrings(version: tuple[int, int, int]) -> bool: + """Whether released bindings are known to carry usable error-enum ``__doc__`` text.""" + return ( + _MIN_12X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS <= version < (13, 0, 0) + or version >= _MIN_13X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS + ) + + def _strip_doxygen_double_colon_prefixes(s: str) -> str: """Remove Doxygen-style ``::`` before CUDA identifiers (not C++ ``Foo::Bar`` scope). @@ -119,9 +129,10 @@ def get_best_available_explanations( ) -> DocstringBackedExplanations | dict[int, str | tuple[str, ...]]: """Pick one explanation source per bindings version. - ``cuda-bindings`` < 13.2.0: use the frozen 13.1.1 fallback tables. - ``cuda-bindings`` >= 13.2.0: use enum-member ``__doc__`` exclusively. + Use enum-member ``__doc__`` only for bindings versions known to expose + usable per-member text (12.9.6+ in the 12.x backport line, 13.2.0+ in the + 13.x mainline). Otherwise keep using the frozen 13.1.1 fallback tables. """ - if _binding_version() < _MIN_BINDING_VERSION_FOR_ENUM_DOCSTRINGS: + if not _binding_version_has_usable_enum_docstrings(_binding_version()): return fallback return DocstringBackedExplanations(enum_type) diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index 238a2df047..7dc3703749 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -12,10 +12,11 @@ def _skip_if_bindings_pre_enum_docstrings(): + from cuda.core._utils.enum_explanations_helpers import _binding_version_has_usable_enum_docstrings from cuda.core._utils.version import binding_version - if binding_version() < (13, 2, 0): - pytest.skip("cuda-bindings < 13.2.0 may not expose enum __doc__ strings") + if not _binding_version_has_usable_enum_docstrings(binding_version()): + pytest.skip("cuda-bindings version does not expose usable enum __doc__ strings") def test_check_driver_error(): diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py index c924ecb2cb..e4d7faf987 100644 --- a/cuda_core/tests/test_utils_enum_explanations_helpers.py +++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py @@ -6,6 +6,7 @@ from cuda.core._utils.enum_explanations_helpers import ( DocstringBackedExplanations, + _binding_version_has_usable_enum_docstrings, _strip_doxygen_double_colon_prefixes, clean_enum_member_docstring, ) @@ -96,25 +97,43 @@ def test_docstring_backed_get_returns_default_for_missing_docstring_without_fall assert lut.get(7, default="sentinel") == "sentinel" -def test_get_best_available_explanations_uses_fallback_before_13_2(monkeypatch): - import cuda.core._utils.enum_explanations_helpers as cleanup - - fallback = {7: "fallback text"} - monkeypatch.setattr(cleanup, "_binding_version", lambda: (13, 1, 1)) - assert cleanup.get_best_available_explanations(_FakeEnumType({7: _FakeEnumMember("doc")}), fallback) is fallback +@pytest.mark.parametrize( + ("version", "expected"), + [ + pytest.param((12, 9, 5), False, id="before_12_9_6"), + pytest.param((12, 9, 6), True, id="from_12_9_6"), + pytest.param((13, 0, 0), False, id="13_0_mainline_gap"), + pytest.param((13, 1, 1), False, id="13_1_1"), + pytest.param((13, 2, 0), True, id="from_13_2_0"), + ], +) +def test_binding_version_has_usable_enum_docstrings(version, expected): + assert _binding_version_has_usable_enum_docstrings(version) is expected -def test_get_best_available_explanations_prefers_docstrings_from_13_2(monkeypatch): +@pytest.mark.parametrize( + ("version", "expects_docstrings"), + [ + pytest.param((12, 9, 5), False, id="before_12_9_6"), + pytest.param((12, 9, 6), True, id="from_12_9_6"), + pytest.param((13, 0, 0), False, id="13_0_mainline_gap"), + pytest.param((13, 2, 0), True, id="from_13_2_0"), + ], +) +def test_get_best_available_explanations_switches_by_version(monkeypatch, version, expects_docstrings): import cuda.core._utils.enum_explanations_helpers as cleanup fallback = {7: "fallback text"} - monkeypatch.setattr(cleanup, "_binding_version", lambda: (13, 2, 0)) + monkeypatch.setattr(cleanup, "_binding_version", lambda: version) expl = cleanup.get_best_available_explanations( _FakeEnumType({7: _FakeEnumMember("clean me")}), fallback, ) - assert isinstance(expl, DocstringBackedExplanations) - assert expl.get(7) == "clean me" + if expects_docstrings: + assert isinstance(expl, DocstringBackedExplanations) + assert expl.get(7) == "clean me" + else: + assert expl is fallback def test_driver_cu_result_explanations_get_matches_clean_docstring(): From 4fde3d6d62cb652542260d3f7c6db75e225a80a8 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 5 Apr 2026 11:49:34 -0700 Subject: [PATCH 3/9] cuda_core: drop unused enum doc cleanup helper Remove the old Doxygen ``::`` normalization path now that error explanations no longer depend on dict-vs-docstring parity checks. This keeps the helper focused on the cleanup rules that still affect user-facing CUDAError messages. Made-with: Cursor --- .../core/_utils/enum_explanations_helpers.py | 32 ++++++------------- .../test_utils_enum_explanations_helpers.py | 13 -------- 2 files changed, 9 insertions(+), 36 deletions(-) diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py index 414c111fce..29f48ce843 100644 --- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py +++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py @@ -8,11 +8,11 @@ the 12.x backport line at ``cuda-bindings`` 12.9.6, and in the mainline 13.x series at ``cuda-bindings`` 13.2.0. This module decides which source to use and normalizes generated docstrings so user-facing ``CUDAError`` messages stay -close to the long-form explanation prose. +presentable. -The cleanup rules here were derived while validating docstring-vs-dict parity -in PR #1805. Keep them narrow and remove them when codegen / fallback support is -no longer needed. +The cleanup rules here were derived while validating generated enum docstrings +in PR #1805. Keep them narrow and remove them when codegen quirks or fallback +support are no longer needed. """ from __future__ import annotations @@ -43,25 +43,11 @@ def _binding_version_has_usable_enum_docstrings(version: tuple[int, int, int]) - ) -def _strip_doxygen_double_colon_prefixes(s: str) -> str: - """Remove Doxygen-style ``::`` before CUDA identifiers (not C++ ``Foo::Bar`` scope). - - The frozen fallback tables come from CUDA header comments and therefore use - Doxygen ``::name`` references. Generated enum ``__doc__`` text uses Sphinx - roles instead, so parity checks need a small amount of normalization. - """ - prev = None - while prev != s: - prev = s - s = re.sub(r"(? str: """Remove spaces around hyphens introduced by line wrapping in generated ``__doc__`` text. This is a narrow workaround for wrapped forms such as ``non- linear`` that - otherwise differ from the single-line fallback prose. + would otherwise look awkward in user-facing messages. """ prev = None while prev != s: @@ -74,10 +60,10 @@ def _fix_hyphenation_wordwrap_spacing(s: str) -> str: def clean_enum_member_docstring(doc: str | None) -> str | None: """Turn an enum member ``__doc__`` into plain text. - The generated enum docstrings are already close to the fallback explanation - prose, but not byte-identical: they may contain Sphinx inline roles, line - wrapping, or a small known codegen defect. Normalize only those differences - so the text is suitable for user-facing error messages. + The generated enum docstrings are already close to user-facing prose, but + they may contain Sphinx inline roles, line wrapping, or a small known + codegen defect. Normalize only those differences so the text is suitable + for error messages. """ if doc is None: return None diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py index e4d7faf987..2c65285322 100644 --- a/cuda_core/tests/test_utils_enum_explanations_helpers.py +++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py @@ -7,7 +7,6 @@ from cuda.core._utils.enum_explanations_helpers import ( DocstringBackedExplanations, _binding_version_has_usable_enum_docstrings, - _strip_doxygen_double_colon_prefixes, clean_enum_member_docstring, ) @@ -63,18 +62,6 @@ def test_clean_enum_member_docstring_none_input(): assert clean_enum_member_docstring(None) is None -@pytest.mark.parametrize( - ("raw", "expected"), - [ - pytest.param("see ::CUDA_SUCCESS", "see CUDA_SUCCESS", id="type_ref"), - pytest.param("Foo::Bar unchanged", "Foo::Bar unchanged", id="cpp_scope_preserved"), - pytest.param("::cuInit() and ::CUstream", "cuInit() and CUstream", id="multiple_prefixes"), - ], -) -def test_strip_doxygen_double_colon_prefixes(raw, expected): - assert _strip_doxygen_double_colon_prefixes(raw) == expected - - def test_docstring_backed_get_returns_default_for_non_enum_code(): lut = DocstringBackedExplanations(_FakeEnumType({})) assert lut.get(-1) is None From 74479647918172aa1b68b11be78a8dea163133eb Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 5 Apr 2026 12:02:57 -0700 Subject: [PATCH 4/9] cuda_core: clarify enum explanation helper docs Clarify that DocstringBackedExplanations is a compatibility shim for the existing ``.get(int(error))`` lookup shape, and trim a low-value implementation note from the docstring cleanup workaround comment. Made-with: Cursor --- cuda_core/cuda/core/_utils/enum_explanations_helpers.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py index 29f48ce843..c58c1c9aee 100644 --- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py +++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py @@ -69,8 +69,7 @@ def clean_enum_member_docstring(doc: str | None) -> str | None: return None s = doc # Known codegen bug on cudaErrorIncompatibleDriverContext. Remove once fixed - # in cuda-bindings code generation. Do not use a raw string for the needle: - # r"\n..." would not match the real newline present in __doc__. + # in cuda-bindings code generation. s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ') s = re.sub( r":(?:py:)?(?:obj|func|meth|class|mod|data|const|exc):`([^`]+)`", @@ -85,11 +84,9 @@ def clean_enum_member_docstring(doc: str | None) -> str | None: class DocstringBackedExplanations: - """``dict.get``-like lookup over enum-member ``__doc__`` strings. + """Compatibility shim exposing enum-member ``__doc__`` text via ``dict.get``. - Once the bindings-version gate says docstrings are available, use them - exclusively. Missing docstrings should surface as ``None`` / ``default`` - rather than silently mixing in frozen fallback prose. + Keeps the existing ``.get(int(error))`` lookup shape used by ``cuda_utils.pyx``. """ __slots__ = ("_enum_type",) From cddb5652c1ab72ff88882363d96aa0907a8dd609 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 5 Apr 2026 12:29:27 -0700 Subject: [PATCH 5/9] cuda_core: trim enum explanation helper tests Remove redundant helper coverage now that DocstringBackedExplanations.get() and clean_enum_member_docstring() are already exercised elsewhere, and simplify the remaining test module imports. Made-with: Cursor --- .../test_utils_enum_explanations_helpers.py | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py index 2c65285322..da70ef6e58 100644 --- a/cuda_core/tests/test_utils_enum_explanations_helpers.py +++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py @@ -4,6 +4,7 @@ import pytest +from cuda.core._utils import enum_explanations_helpers from cuda.core._utils.enum_explanations_helpers import ( DocstringBackedExplanations, _binding_version_has_usable_enum_docstrings, @@ -74,16 +75,6 @@ def test_docstring_backed_get_returns_default_for_missing_docstring(): assert lut.get(7, default="sentinel") == "sentinel" -def test_docstring_backed_get_returns_default_for_unknown_code(): - lut = DocstringBackedExplanations(_FakeEnumType({})) - assert lut.get(99, default="sentinel") == "sentinel" - - -def test_docstring_backed_get_returns_default_for_missing_docstring_without_fallback(): - lut = DocstringBackedExplanations(_FakeEnumType({7: _FakeEnumMember(None)})) - assert lut.get(7, default="sentinel") == "sentinel" - - @pytest.mark.parametrize( ("version", "expected"), [ @@ -108,11 +99,9 @@ def test_binding_version_has_usable_enum_docstrings(version, expected): ], ) def test_get_best_available_explanations_switches_by_version(monkeypatch, version, expects_docstrings): - import cuda.core._utils.enum_explanations_helpers as cleanup - fallback = {7: "fallback text"} - monkeypatch.setattr(cleanup, "_binding_version", lambda: version) - expl = cleanup.get_best_available_explanations( + monkeypatch.setattr(enum_explanations_helpers, "_binding_version", lambda: version) + expl = enum_explanations_helpers.get_best_available_explanations( _FakeEnumType({7: _FakeEnumMember("clean me")}), fallback, ) @@ -121,13 +110,3 @@ def test_get_best_available_explanations_switches_by_version(monkeypatch, versio assert expl.get(7) == "clean me" else: assert expl is fallback - - -def test_driver_cu_result_explanations_get_matches_clean_docstring(): - pytest.importorskip("cuda.bindings") - from cuda.bindings import driver - from cuda.core._utils.driver_cu_result_explanations import DRIVER_CU_RESULT_EXPLANATIONS - - e = driver.CUresult.CUDA_SUCCESS - code = int(e) - assert DRIVER_CU_RESULT_EXPLANATIONS.get(code) == clean_enum_member_docstring(e.__doc__) From f70c09963e1c4e866d7392eee66e14f17fcd35b1 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 6 Apr 2026 11:41:37 -0700 Subject: [PATCH 6/9] cuda_core: generalize enum doc cleanup regexes Broaden the inline-role cleanup to accept generic RST roles and widen the word-wrap hyphen fix beyond lowercase-only cases. Keep the current 13.2.x output unchanged while expanding unit coverage for the newly supported forms. Made-with: Cursor --- .../core/_utils/enum_explanations_helpers.py | 17 ++++++++--------- .../test_utils_enum_explanations_helpers.py | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py index c58c1c9aee..27b5dc1c74 100644 --- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py +++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py @@ -23,6 +23,9 @@ _MIN_12X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (12, 9, 6) _MIN_13X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (13, 2, 0) +_RST_INLINE_ROLE_RE = re.compile(r":(?:[a-z]+:)?[a-z]+:`([^`]+)`") +_WORDWRAP_HYPHEN_AFTER_RE = re.compile(r"(?<=[0-9A-Za-z_])- (?=[0-9A-Za-z_])") +_WORDWRAP_HYPHEN_BEFORE_RE = re.compile(r"(?<=[0-9A-Za-z_]) -(?=[0-9A-Za-z_])") # ``version.pyx`` cannot be reused here (circular import via ``cuda_utils``). @@ -46,14 +49,14 @@ def _binding_version_has_usable_enum_docstrings(version: tuple[int, int, int]) - def _fix_hyphenation_wordwrap_spacing(s: str) -> str: """Remove spaces around hyphens introduced by line wrapping in generated ``__doc__`` text. - This is a narrow workaround for wrapped forms such as ``non- linear`` that - would otherwise look awkward in user-facing messages. + This targets asymmetric wrap artifacts such as ``non- linear`` or + ``GPU- Direct`` while leaving intentional ``a - b`` separators alone. """ prev = None while prev != s: prev = s - s = re.sub(r"([a-z])- ([a-z])", r"\1-\2", s) - s = re.sub(r"([a-z]) -([a-z])", r"\1-\2", s) + s = _WORDWRAP_HYPHEN_AFTER_RE.sub("-", s) + s = _WORDWRAP_HYPHEN_BEFORE_RE.sub("-", s) return s @@ -71,11 +74,7 @@ def clean_enum_member_docstring(doc: str | None) -> str | None: # Known codegen bug on cudaErrorIncompatibleDriverContext. Remove once fixed # in cuda-bindings code generation. s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ') - s = re.sub( - r":(?:py:)?(?:obj|func|meth|class|mod|data|const|exc):`([^`]+)`", - lambda m: re.sub(r"^~?\.", "", m.group(1)), - s, - ) + s = _RST_INLINE_ROLE_RE.sub(lambda m: re.sub(r"^~?\.", "", m.group(1)), s) s = re.sub(r"\*\*([^*]+)\*\*", r"\1", s) s = re.sub(r"\*([^*]+)\*", r"\1", s) s = re.sub(r"\s+", " ", s).strip() diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py index da70ef6e58..d31e40ee47 100644 --- a/cuda_core/tests/test_utils_enum_explanations_helpers.py +++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py @@ -36,18 +36,32 @@ def __call__(self, code): pytest.param( "see\n:py:obj:`~.cuInit()` or :py:obj:`cuCtxDestroy()`", "see cuInit() or cuCtxDestroy()", - id="sphinx_py_obj_roles", + id="rst_py_domain_role", ), pytest.param( "x :py:func:`~.cudaMalloc()` y", "x cudaMalloc() y", - id="sphinx_py_func_role", + id="rst_py_role", ), + pytest.param( + "x :c:func:`cuLaunchKernel` y", + "x cuLaunchKernel y", + id="rst_non_py_domain_role", + ), + pytest.param("x :term:`device` y", "x device y", id="rst_role_without_domain"), pytest.param("**Note:** text", "Note: text", id="strip_bold"), pytest.param("*Note* text", "Note text", id="strip_italic"), pytest.param("[Deprecated]\n", "[Deprecated]", id="deprecated_line"), pytest.param("non- linear", "non-linear", id="hyphen_space_after"), pytest.param("word -word", "word-word", id="hyphen_space_before"), + pytest.param("GPU- Direct", "GPU-Direct", id="hyphen_space_after_uppercase"), + pytest.param("peer -GPU", "peer-GPU", id="hyphen_space_before_uppercase"), + pytest.param("L2- cache", "L2-cache", id="hyphen_space_after_digit"), + pytest.param( + "Common causes are - a. bad access", + "Common causes are - a. bad access", + id="preserve_dash_separator", + ), pytest.param( 'Please see\n:py:obj:`~.Interactions`with the CUDA Driver API" for more information.', 'Please see "Interactions with the CUDA Driver API" for more information.', From c4b5862a8693ff82eca5b7dd0272fc8894da0cb2 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 6 Apr 2026 11:41:54 -0700 Subject: [PATCH 7/9] cuda_core: document inline enum cleanup regexes Add terse comments for the remaining inline regex substitutions so the docstring cleanup steps are easier to follow without changing behavior. Made-with: Cursor --- cuda_core/cuda/core/_utils/enum_explanations_helpers.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py index 27b5dc1c74..3b42ae5406 100644 --- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py +++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py @@ -74,9 +74,13 @@ def clean_enum_member_docstring(doc: str | None) -> str | None: # Known codegen bug on cudaErrorIncompatibleDriverContext. Remove once fixed # in cuda-bindings code generation. s = s.replace("\n:py:obj:`~.Interactions`", ' "Interactions ') + # Drop a leading "~." or "." after removing the surrounding RST inline role. s = _RST_INLINE_ROLE_RE.sub(lambda m: re.sub(r"^~?\.", "", m.group(1)), s) + # Strip simple bold emphasis markers. s = re.sub(r"\*\*([^*]+)\*\*", r"\1", s) + # Strip simple italic emphasis markers. s = re.sub(r"\*([^*]+)\*", r"\1", s) + # Collapse wrapped lines and repeated spaces. s = re.sub(r"\s+", " ", s).strip() s = _fix_hyphenation_wordwrap_spacing(s) return s From 70258546444afe54b2632a39d68e7ec7f601c996 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 6 Apr 2026 12:20:42 -0700 Subject: [PATCH 8/9] cuda_core: lazily import frozen enum explanation tables Keep cuda_utils.pyx unchanged while moving the large 13.1.1 explanation tables into frozen-only modules that are imported only for older bindings. Use loader-aware selection in enum_explanations_helpers.py and add tests that prove docstring-capable bindings skip the frozen-module imports. Made-with: Cursor --- .../_utils/driver_cu_result_explanations.py | 355 +----------- .../driver_cu_result_explanations_frozen.py | 350 +++++++++++ .../core/_utils/enum_explanations_helpers.py | 10 +- .../_utils/runtime_cuda_error_explanations.py | 543 +----------------- .../runtime_cuda_error_explanations_frozen.py | 538 +++++++++++++++++ .../test_utils_enum_explanations_helpers.py | 44 ++ 6 files changed, 954 insertions(+), 886 deletions(-) create mode 100644 cuda_core/cuda/core/_utils/driver_cu_result_explanations_frozen.py create mode 100644 cuda_core/cuda/core/_utils/runtime_cuda_error_explanations_frozen.py diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py index 5184e72650..f4894d7563 100644 --- a/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py +++ b/cuda_core/cuda/core/_utils/driver_cu_result_explanations.py @@ -4,352 +4,11 @@ from cuda.bindings import driver from cuda.core._utils.enum_explanations_helpers import get_best_available_explanations -# CUDA Toolkit v13.1.1 -_FALLBACK_EXPLANATIONS = { - 0: ( - "The API call returned with no errors. In the case of query calls, this" - " also means that the operation being queried is complete (see" - " ::cuEventQuery() and ::cuStreamQuery())." - ), - 1: ( - "This indicates that one or more of the parameters passed to the API call" - " is not within an acceptable range of values." - ), - 2: ( - "The API call failed because it was unable to allocate enough memory or" - " other resources to perform the requested operation." - ), - 3: ( - "This indicates that the CUDA driver has not been initialized with" - " ::cuInit() or that initialization has failed." - ), - 4: "This indicates that the CUDA driver is in the process of shutting down.", - 5: ( - "This indicates profiler is not initialized for this run. This can" - " happen when the application is running with external profiling tools" - " like visual profiler." - ), - 6: ( - "This error return is deprecated as of CUDA 5.0. It is no longer an error" - " to attempt to enable/disable the profiling via ::cuProfilerStart or" - " ::cuProfilerStop without initialization." - ), - 7: ( - "This error return is deprecated as of CUDA 5.0. It is no longer an error" - " to call cuProfilerStart() when profiling is already enabled." - ), - 8: ( - "This error return is deprecated as of CUDA 5.0. It is no longer an error" - " to call cuProfilerStop() when profiling is already disabled." - ), - 34: ( - "This indicates that the CUDA driver that the application has loaded is a" - " stub library. Applications that run with the stub rather than a real" - " driver loaded will result in CUDA API returning this error." - ), - 36: ( - "This indicates that the API call requires a newer CUDA driver than the one" - " currently installed. Users should install an updated NVIDIA CUDA driver" - " to allow the API call to succeed." - ), - 46: ( - "This indicates that requested CUDA device is unavailable at the current" - " time. Devices are often unavailable due to use of" - " ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS or ::CU_COMPUTEMODE_PROHIBITED." - ), - 100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."), - 101: ( - "This indicates that the device ordinal supplied by the user does not" - " correspond to a valid CUDA device or that the action requested is" - " invalid for the specified device." - ), - 102: "This error indicates that the Grid license is not applied.", - 200: ("This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module."), - 201: ( - "This most frequently indicates that there is no context bound to the" - " current thread. This can also be returned if the context passed to an" - " API call is not a valid handle (such as a context that has had" - " ::cuCtxDestroy() invoked on it). This can also be returned if a user" - " mixes different API versions (i.e. 3010 context with 3020 API calls)." - " See ::cuCtxGetApiVersion() for more details." - " This can also be returned if the green context passed to an API call" - " was not converted to a ::CUcontext using ::cuCtxFromGreenCtx API." - ), - 202: ( - "This indicated that the context being supplied as a parameter to the" - " API call was already the active context." - " This error return is deprecated as of CUDA 3.2. It is no longer an" - " error to attempt to push the active context via ::cuCtxPushCurrent()." - ), - 205: "This indicates that a map or register operation has failed.", - 206: "This indicates that an unmap or unregister operation has failed.", - 207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."), - 208: "This indicates that the resource is already mapped.", - 209: ( - "This indicates that there is no kernel image available that is suitable" - " for the device. This can occur when a user specifies code generation" - " options for a particular CUDA source file that do not include the" - " corresponding device configuration." - ), - 210: "This indicates that a resource has already been acquired.", - 211: "This indicates that a resource is not mapped.", - 212: ("This indicates that a mapped resource is not available for access as an array."), - 213: ("This indicates that a mapped resource is not available for access as a pointer."), - 214: ("This indicates that an uncorrectable ECC error was detected during execution."), - 215: ("This indicates that the ::CUlimit passed to the API call is not supported by the active device."), - 216: ( - "This indicates that the ::CUcontext passed to the API call can" - " only be bound to a single CPU thread at a time but is already" - " bound to a CPU thread." - ), - 217: ("This indicates that peer access is not supported across the given devices."), - 218: "This indicates that a PTX JIT compilation failed.", - 219: "This indicates an error with OpenGL or DirectX context.", - 220: ("This indicates that an uncorrectable NVLink error was detected during the execution."), - 221: "This indicates that the PTX JIT compiler library was not found.", - 222: "This indicates that the provided PTX was compiled with an unsupported toolchain.", - 223: "This indicates that the PTX JIT compilation was disabled.", - 224: ("This indicates that the ::CUexecAffinityType passed to the API call is not supported by the active device."), - 225: ( - "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize." - ), - 226: ( - "This indicates that an exception occurred on the device that is now" - " contained by the GPU's error containment capability. Common causes are -" - " a. Certain types of invalid accesses of peer GPU memory over nvlink" - " b. Certain classes of hardware errors" - " This leaves the process in an inconsistent state and any further CUDA" - " work will return the same error. To continue using CUDA, the process must" - " be terminated and relaunched." - ), - 300: ( - "This indicates that the device kernel source is invalid. This includes" - " compilation/linker errors encountered in device code or user error." - ), - 301: "This indicates that the file specified was not found.", - 302: "This indicates that a link to a shared object failed to resolve.", - 303: "This indicates that initialization of a shared object failed.", - 304: "This indicates that an OS call failed.", - 400: ( - "This indicates that a resource handle passed to the API call was not" - " valid. Resource handles are opaque types like ::CUstream and ::CUevent." - ), - 401: ( - "This indicates that a resource required by the API call is not in a" - " valid state to perform the requested operation." - ), - 402: ( - "This indicates an attempt was made to introspect an object in a way that" - " would discard semantically important information. This is either due to" - " the object using funtionality newer than the API version used to" - " introspect it or omission of optional return arguments." - ), - 500: ( - "This indicates that a named symbol was not found. Examples of symbols" - " are global/constant variable names, driver function names, texture names," - " and surface names." - ), - 600: ( - "This indicates that asynchronous operations issued previously have not" - " completed yet. This result is not actually an error, but must be indicated" - " differently than ::CUDA_SUCCESS (which indicates completion). Calls that" - " may return this value include ::cuEventQuery() and ::cuStreamQuery()." - ), - 700: ( - "While executing a kernel, the device encountered a" - " load or store instruction on an invalid memory address." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 701: ( - "This indicates that a launch did not occur because it did not have" - " appropriate resources. This error usually indicates that the user has" - " attempted to pass too many arguments to the device kernel, or the" - " kernel launch specifies too many threads for the kernel's register" - " count. Passing arguments of the wrong size (i.e. a 64-bit pointer" - " when a 32-bit int is expected) is equivalent to passing too many" - " arguments and can also result in this error." - ), - 702: ( - "This indicates that the device kernel took too long to execute. This can" - " only occur if timeouts are enabled - see the device attribute" - " ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 703: ("This error indicates a kernel launch that uses an incompatible texturing mode."), - 704: ( - "This error indicates that a call to ::cuCtxEnablePeerAccess() is" - " trying to re-enable peer access to a context which has already" - " had peer access to it enabled." - ), - 705: ( - "This error indicates that ::cuCtxDisablePeerAccess() is" - " trying to disable peer access which has not been enabled yet" - " via ::cuCtxEnablePeerAccess()." - ), - 708: ("This error indicates that the primary context for the specified device has already been initialized."), - 709: ( - "This error indicates that the context current to the calling thread" - " has been destroyed using ::cuCtxDestroy, or is a primary context which" - " has not yet been initialized." - ), - 710: ( - "A device-side assert triggered during kernel execution. The context" - " cannot be used anymore, and must be destroyed. All existing device" - " memory allocations from this context are invalid and must be" - " reconstructed if the program is to continue using CUDA." - ), - 711: ( - "This error indicates that the hardware resources required to enable" - " peer access have been exhausted for one or more of the devices" - " passed to ::cuCtxEnablePeerAccess()." - ), - 712: ("This error indicates that the memory range passed to ::cuMemHostRegister() has already been registered."), - 713: ( - "This error indicates that the pointer passed to ::cuMemHostUnregister()" - " does not correspond to any currently registered memory region." - ), - 714: ( - "While executing a kernel, the device encountered a stack error." - " This can be due to stack corruption or exceeding the stack size limit." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 715: ( - "While executing a kernel, the device encountered an illegal instruction." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 716: ( - "While executing a kernel, the device encountered a load or store instruction" - " on a memory address which is not aligned." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 717: ( - "While executing a kernel, the device encountered an instruction" - " which can only operate on memory locations in certain address spaces" - " (global, shared, or local), but was supplied a memory address not" - " belonging to an allowed address space." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 718: ( - "While executing a kernel, the device program counter wrapped its address space." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 719: ( - "An exception occurred on the device while executing a kernel. Common" - " causes include dereferencing an invalid device pointer and accessing" - " out of bounds shared memory. Less common cases can be system specific - more" - " information about these cases can be found in the system specific user guide." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 720: ( - "This error indicates that the number of blocks launched per grid for a kernel that was" - " launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice" - " exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor" - " or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors" - " as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT." - ), - 721: ( - "An exception occurred on the device while exiting a kernel using tensor memory: the" - " tensor memory was not completely deallocated. This leaves the process in an inconsistent" - " state and any further CUDA work will return the same error. To continue using CUDA, the" - " process must be terminated and relaunched." - ), - 800: "This error indicates that the attempted operation is not permitted.", - 801: ("This error indicates that the attempted operation is not supported on the current system or device."), - 802: ( - "This error indicates that the system is not yet ready to start any CUDA" - " work. To continue using CUDA, verify the system configuration is in a" - " valid state and all required driver daemons are actively running." - " More information about this error can be found in the system specific" - " user guide." - ), - 803: ( - "This error indicates that there is a mismatch between the versions of" - " the display driver and the CUDA driver. Refer to the compatibility documentation" - " for supported versions." - ), - 804: ( - "This error indicates that the system was upgraded to run with forward compatibility" - " but the visible hardware detected by CUDA does not support this configuration." - " Refer to the compatibility documentation for the supported hardware matrix or ensure" - " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES" - " environment variable." - ), - 805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.", - 806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.", - 807: ( - "This error indicates that the MPS server is not ready to accept new MPS client requests." - " This error can be returned when the MPS server is in the process of recovering from a fatal failure." - ), - 808: "This error indicates that the hardware resources required to create MPS client have been exhausted.", - 809: "This error indicates the the hardware resources required to support device connections have been exhausted.", - 810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.", - 811: "This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.", - 812: "This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.", - 900: ("This error indicates that the operation is not permitted when the stream is capturing."), - 901: ( - "This error indicates that the current capture sequence on the stream" - " has been invalidated due to a previous error." - ), - 902: ( - "This error indicates that the operation would have resulted in a merge of two independent capture sequences." - ), - 903: "This error indicates that the capture was not initiated in this stream.", - 904: ("This error indicates that the capture sequence contains a fork that was not joined to the primary stream."), - 905: ( - "This error indicates that a dependency would have been created which" - " crosses the capture sequence boundary. Only implicit in-stream ordering" - " dependencies are allowed to cross the boundary." - ), - 906: ("This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy."), - 907: ( - "This error indicates that the operation is not permitted on an event which" - " was last recorded in a capturing stream." - ), - 908: ( - "A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED" - " argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a" - " different thread." - ), - 909: "This error indicates that the timeout specified for the wait operation has lapsed.", - 910: ( - "This error indicates that the graph update was not performed because it included" - " changes which violated constraints specific to instantiated graph update." - ), - 911: ( - "This indicates that an async error has occurred in a device outside of CUDA." - " If CUDA was waiting for an external device's signal before consuming shared data," - " the external device signaled an error indicating that the data is not valid for" - " consumption. This leaves the process in an inconsistent state and any further CUDA" - " work will return the same error. To continue using CUDA, the process must be" - " terminated and relaunched." - ), - 912: "Indicates a kernel launch error due to cluster misconfiguration.", - 913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."), - 914: ("This error indicates one or more resources passed in are not valid resource types for the operation."), - 915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."), - 916: ("This error indicates that an error happened during the key rotation sequence."), - 917: ( - "This error indicates that the requested operation is not permitted because the" - " stream is in a detached state. This can occur if the green context associated" - " with the stream has been destroyed, limiting the stream's operational capabilities." - ), - 999: "This indicates that an unknown internal error has occurred.", -} -DRIVER_CU_RESULT_EXPLANATIONS = get_best_available_explanations(driver.CUresult, _FALLBACK_EXPLANATIONS) +def _load_fallback_explanations(): + from cuda.core._utils.driver_cu_result_explanations_frozen import _FALLBACK_EXPLANATIONS + + return _FALLBACK_EXPLANATIONS + + +DRIVER_CU_RESULT_EXPLANATIONS = get_best_available_explanations(driver.CUresult, _load_fallback_explanations) diff --git a/cuda_core/cuda/core/_utils/driver_cu_result_explanations_frozen.py b/cuda_core/cuda/core/_utils/driver_cu_result_explanations_frozen.py new file mode 100644 index 0000000000..e396afaa79 --- /dev/null +++ b/cuda_core/cuda/core/_utils/driver_cu_result_explanations_frozen.py @@ -0,0 +1,350 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# CUDA Toolkit v13.1.1 +_FALLBACK_EXPLANATIONS = { + 0: ( + "The API call returned with no errors. In the case of query calls, this" + " also means that the operation being queried is complete (see" + " ::cuEventQuery() and ::cuStreamQuery())." + ), + 1: ( + "This indicates that one or more of the parameters passed to the API call" + " is not within an acceptable range of values." + ), + 2: ( + "The API call failed because it was unable to allocate enough memory or" + " other resources to perform the requested operation." + ), + 3: ( + "This indicates that the CUDA driver has not been initialized with" + " ::cuInit() or that initialization has failed." + ), + 4: "This indicates that the CUDA driver is in the process of shutting down.", + 5: ( + "This indicates profiler is not initialized for this run. This can" + " happen when the application is running with external profiling tools" + " like visual profiler." + ), + 6: ( + "This error return is deprecated as of CUDA 5.0. It is no longer an error" + " to attempt to enable/disable the profiling via ::cuProfilerStart or" + " ::cuProfilerStop without initialization." + ), + 7: ( + "This error return is deprecated as of CUDA 5.0. It is no longer an error" + " to call cuProfilerStart() when profiling is already enabled." + ), + 8: ( + "This error return is deprecated as of CUDA 5.0. It is no longer an error" + " to call cuProfilerStop() when profiling is already disabled." + ), + 34: ( + "This indicates that the CUDA driver that the application has loaded is a" + " stub library. Applications that run with the stub rather than a real" + " driver loaded will result in CUDA API returning this error." + ), + 36: ( + "This indicates that the API call requires a newer CUDA driver than the one" + " currently installed. Users should install an updated NVIDIA CUDA driver" + " to allow the API call to succeed." + ), + 46: ( + "This indicates that requested CUDA device is unavailable at the current" + " time. Devices are often unavailable due to use of" + " ::CU_COMPUTEMODE_EXCLUSIVE_PROCESS or ::CU_COMPUTEMODE_PROHIBITED." + ), + 100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."), + 101: ( + "This indicates that the device ordinal supplied by the user does not" + " correspond to a valid CUDA device or that the action requested is" + " invalid for the specified device." + ), + 102: "This error indicates that the Grid license is not applied.", + 200: ("This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module."), + 201: ( + "This most frequently indicates that there is no context bound to the" + " current thread. This can also be returned if the context passed to an" + " API call is not a valid handle (such as a context that has had" + " ::cuCtxDestroy() invoked on it). This can also be returned if a user" + " mixes different API versions (i.e. 3010 context with 3020 API calls)." + " See ::cuCtxGetApiVersion() for more details." + " This can also be returned if the green context passed to an API call" + " was not converted to a ::CUcontext using ::cuCtxFromGreenCtx API." + ), + 202: ( + "This indicated that the context being supplied as a parameter to the" + " API call was already the active context." + " This error return is deprecated as of CUDA 3.2. It is no longer an" + " error to attempt to push the active context via ::cuCtxPushCurrent()." + ), + 205: "This indicates that a map or register operation has failed.", + 206: "This indicates that an unmap or unregister operation has failed.", + 207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."), + 208: "This indicates that the resource is already mapped.", + 209: ( + "This indicates that there is no kernel image available that is suitable" + " for the device. This can occur when a user specifies code generation" + " options for a particular CUDA source file that do not include the" + " corresponding device configuration." + ), + 210: "This indicates that a resource has already been acquired.", + 211: "This indicates that a resource is not mapped.", + 212: ("This indicates that a mapped resource is not available for access as an array."), + 213: ("This indicates that a mapped resource is not available for access as a pointer."), + 214: ("This indicates that an uncorrectable ECC error was detected during execution."), + 215: ("This indicates that the ::CUlimit passed to the API call is not supported by the active device."), + 216: ( + "This indicates that the ::CUcontext passed to the API call can" + " only be bound to a single CPU thread at a time but is already" + " bound to a CPU thread." + ), + 217: ("This indicates that peer access is not supported across the given devices."), + 218: "This indicates that a PTX JIT compilation failed.", + 219: "This indicates an error with OpenGL or DirectX context.", + 220: ("This indicates that an uncorrectable NVLink error was detected during the execution."), + 221: "This indicates that the PTX JIT compiler library was not found.", + 222: "This indicates that the provided PTX was compiled with an unsupported toolchain.", + 223: "This indicates that the PTX JIT compilation was disabled.", + 224: ("This indicates that the ::CUexecAffinityType passed to the API call is not supported by the active device."), + 225: ( + "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize." + ), + 226: ( + "This indicates that an exception occurred on the device that is now" + " contained by the GPU's error containment capability. Common causes are -" + " a. Certain types of invalid accesses of peer GPU memory over nvlink" + " b. Certain classes of hardware errors" + " This leaves the process in an inconsistent state and any further CUDA" + " work will return the same error. To continue using CUDA, the process must" + " be terminated and relaunched." + ), + 300: ( + "This indicates that the device kernel source is invalid. This includes" + " compilation/linker errors encountered in device code or user error." + ), + 301: "This indicates that the file specified was not found.", + 302: "This indicates that a link to a shared object failed to resolve.", + 303: "This indicates that initialization of a shared object failed.", + 304: "This indicates that an OS call failed.", + 400: ( + "This indicates that a resource handle passed to the API call was not" + " valid. Resource handles are opaque types like ::CUstream and ::CUevent." + ), + 401: ( + "This indicates that a resource required by the API call is not in a" + " valid state to perform the requested operation." + ), + 402: ( + "This indicates an attempt was made to introspect an object in a way that" + " would discard semantically important information. This is either due to" + " the object using funtionality newer than the API version used to" + " introspect it or omission of optional return arguments." + ), + 500: ( + "This indicates that a named symbol was not found. Examples of symbols" + " are global/constant variable names, driver function names, texture names," + " and surface names." + ), + 600: ( + "This indicates that asynchronous operations issued previously have not" + " completed yet. This result is not actually an error, but must be indicated" + " differently than ::CUDA_SUCCESS (which indicates completion). Calls that" + " may return this value include ::cuEventQuery() and ::cuStreamQuery()." + ), + 700: ( + "While executing a kernel, the device encountered a" + " load or store instruction on an invalid memory address." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 701: ( + "This indicates that a launch did not occur because it did not have" + " appropriate resources. This error usually indicates that the user has" + " attempted to pass too many arguments to the device kernel, or the" + " kernel launch specifies too many threads for the kernel's register" + " count. Passing arguments of the wrong size (i.e. a 64-bit pointer" + " when a 32-bit int is expected) is equivalent to passing too many" + " arguments and can also result in this error." + ), + 702: ( + "This indicates that the device kernel took too long to execute. This can" + " only occur if timeouts are enabled - see the device attribute" + " ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 703: ("This error indicates a kernel launch that uses an incompatible texturing mode."), + 704: ( + "This error indicates that a call to ::cuCtxEnablePeerAccess() is" + " trying to re-enable peer access to a context which has already" + " had peer access to it enabled." + ), + 705: ( + "This error indicates that ::cuCtxDisablePeerAccess() is" + " trying to disable peer access which has not been enabled yet" + " via ::cuCtxEnablePeerAccess()." + ), + 708: ("This error indicates that the primary context for the specified device has already been initialized."), + 709: ( + "This error indicates that the context current to the calling thread" + " has been destroyed using ::cuCtxDestroy, or is a primary context which" + " has not yet been initialized." + ), + 710: ( + "A device-side assert triggered during kernel execution. The context" + " cannot be used anymore, and must be destroyed. All existing device" + " memory allocations from this context are invalid and must be" + " reconstructed if the program is to continue using CUDA." + ), + 711: ( + "This error indicates that the hardware resources required to enable" + " peer access have been exhausted for one or more of the devices" + " passed to ::cuCtxEnablePeerAccess()." + ), + 712: ("This error indicates that the memory range passed to ::cuMemHostRegister() has already been registered."), + 713: ( + "This error indicates that the pointer passed to ::cuMemHostUnregister()" + " does not correspond to any currently registered memory region." + ), + 714: ( + "While executing a kernel, the device encountered a stack error." + " This can be due to stack corruption or exceeding the stack size limit." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 715: ( + "While executing a kernel, the device encountered an illegal instruction." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 716: ( + "While executing a kernel, the device encountered a load or store instruction" + " on a memory address which is not aligned." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 717: ( + "While executing a kernel, the device encountered an instruction" + " which can only operate on memory locations in certain address spaces" + " (global, shared, or local), but was supplied a memory address not" + " belonging to an allowed address space." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 718: ( + "While executing a kernel, the device program counter wrapped its address space." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 719: ( + "An exception occurred on the device while executing a kernel. Common" + " causes include dereferencing an invalid device pointer and accessing" + " out of bounds shared memory. Less common cases can be system specific - more" + " information about these cases can be found in the system specific user guide." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 720: ( + "This error indicates that the number of blocks launched per grid for a kernel that was" + " launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice" + " exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor" + " or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors" + " as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT." + ), + 721: ( + "An exception occurred on the device while exiting a kernel using tensor memory: the" + " tensor memory was not completely deallocated. This leaves the process in an inconsistent" + " state and any further CUDA work will return the same error. To continue using CUDA, the" + " process must be terminated and relaunched." + ), + 800: "This error indicates that the attempted operation is not permitted.", + 801: ("This error indicates that the attempted operation is not supported on the current system or device."), + 802: ( + "This error indicates that the system is not yet ready to start any CUDA" + " work. To continue using CUDA, verify the system configuration is in a" + " valid state and all required driver daemons are actively running." + " More information about this error can be found in the system specific" + " user guide." + ), + 803: ( + "This error indicates that there is a mismatch between the versions of" + " the display driver and the CUDA driver. Refer to the compatibility documentation" + " for supported versions." + ), + 804: ( + "This error indicates that the system was upgraded to run with forward compatibility" + " but the visible hardware detected by CUDA does not support this configuration." + " Refer to the compatibility documentation for the supported hardware matrix or ensure" + " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES" + " environment variable." + ), + 805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.", + 806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.", + 807: ( + "This error indicates that the MPS server is not ready to accept new MPS client requests." + " This error can be returned when the MPS server is in the process of recovering from a fatal failure." + ), + 808: "This error indicates that the hardware resources required to create MPS client have been exhausted.", + 809: "This error indicates the the hardware resources required to support device connections have been exhausted.", + 810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.", + 811: "This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.", + 812: "This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.", + 900: ("This error indicates that the operation is not permitted when the stream is capturing."), + 901: ( + "This error indicates that the current capture sequence on the stream" + " has been invalidated due to a previous error." + ), + 902: ( + "This error indicates that the operation would have resulted in a merge of two independent capture sequences." + ), + 903: "This error indicates that the capture was not initiated in this stream.", + 904: ("This error indicates that the capture sequence contains a fork that was not joined to the primary stream."), + 905: ( + "This error indicates that a dependency would have been created which" + " crosses the capture sequence boundary. Only implicit in-stream ordering" + " dependencies are allowed to cross the boundary." + ), + 906: ("This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy."), + 907: ( + "This error indicates that the operation is not permitted on an event which" + " was last recorded in a capturing stream." + ), + 908: ( + "A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED" + " argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a" + " different thread." + ), + 909: "This error indicates that the timeout specified for the wait operation has lapsed.", + 910: ( + "This error indicates that the graph update was not performed because it included" + " changes which violated constraints specific to instantiated graph update." + ), + 911: ( + "This indicates that an async error has occurred in a device outside of CUDA." + " If CUDA was waiting for an external device's signal before consuming shared data," + " the external device signaled an error indicating that the data is not valid for" + " consumption. This leaves the process in an inconsistent state and any further CUDA" + " work will return the same error. To continue using CUDA, the process must be" + " terminated and relaunched." + ), + 912: "Indicates a kernel launch error due to cluster misconfiguration.", + 913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."), + 914: ("This error indicates one or more resources passed in are not valid resource types for the operation."), + 915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."), + 916: ("This error indicates that an error happened during the key rotation sequence."), + 917: ( + "This error indicates that the requested operation is not permitted because the" + " stream is in a detached state. This can occur if the green context associated" + " with the stream has been destroyed, limiting the stream's operational capabilities." + ), + 999: "This indicates that an unknown internal error has occurred.", +} diff --git a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py index 3b42ae5406..a176de73d1 100644 --- a/cuda_core/cuda/core/_utils/enum_explanations_helpers.py +++ b/cuda_core/cuda/core/_utils/enum_explanations_helpers.py @@ -19,6 +19,7 @@ import importlib.metadata import re +from collections.abc import Callable from typing import Any _MIN_12X_BINDING_VERSION_FOR_ENUM_DOCSTRINGS = (12, 9, 6) @@ -26,6 +27,8 @@ _RST_INLINE_ROLE_RE = re.compile(r":(?:[a-z]+:)?[a-z]+:`([^`]+)`") _WORDWRAP_HYPHEN_AFTER_RE = re.compile(r"(?<=[0-9A-Za-z_])- (?=[0-9A-Za-z_])") _WORDWRAP_HYPHEN_BEFORE_RE = re.compile(r"(?<=[0-9A-Za-z_]) -(?=[0-9A-Za-z_])") +_ExplanationTable = dict[int, str | tuple[str, ...]] +_ExplanationTableLoader = Callable[[], _ExplanationTable] # ``version.pyx`` cannot be reused here (circular import via ``cuda_utils``). @@ -111,8 +114,9 @@ def get(self, code: int, default: str | None = None) -> str | None: def get_best_available_explanations( - enum_type: Any, fallback: dict[int, str | tuple[str, ...]] -) -> DocstringBackedExplanations | dict[int, str | tuple[str, ...]]: + enum_type: Any, + fallback: _ExplanationTable | _ExplanationTableLoader, +) -> DocstringBackedExplanations | _ExplanationTable: """Pick one explanation source per bindings version. Use enum-member ``__doc__`` only for bindings versions known to expose @@ -120,5 +124,7 @@ def get_best_available_explanations( 13.x mainline). Otherwise keep using the frozen 13.1.1 fallback tables. """ if not _binding_version_has_usable_enum_docstrings(_binding_version()): + if callable(fallback): + return fallback() return fallback return DocstringBackedExplanations(enum_type) diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py index 5f587a0350..ab5be10e2d 100644 --- a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py +++ b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations.py @@ -4,540 +4,11 @@ from cuda.bindings import runtime from cuda.core._utils.enum_explanations_helpers import get_best_available_explanations -# CUDA Toolkit v13.1.1 -_FALLBACK_EXPLANATIONS = { - 0: ( - "The API call returned with no errors. In the case of query calls, this" - " also means that the operation being queried is complete (see" - " ::cudaEventQuery() and ::cudaStreamQuery())." - ), - 1: ( - "This indicates that one or more of the parameters passed to the API call" - " is not within an acceptable range of values." - ), - 2: ( - "The API call failed because it was unable to allocate enough memory or" - " other resources to perform the requested operation." - ), - 3: ("The API call failed because the CUDA driver and runtime could not be initialized."), - 4: ( - "This indicates that a CUDA Runtime API call cannot be executed because" - " it is being called during process shut down, at a point in time after" - " CUDA driver has been unloaded." - ), - 5: ( - "This indicates profiler is not initialized for this run. This can" - " happen when the application is running with external profiling tools" - " like visual profiler." - ), - 6: ( - "This error return is deprecated as of CUDA 5.0. It is no longer an error" - " to attempt to enable/disable the profiling via ::cudaProfilerStart or" - " ::cudaProfilerStop without initialization." - ), - 7: ( - "This error return is deprecated as of CUDA 5.0. It is no longer an error" - " to call cudaProfilerStart() when profiling is already enabled." - ), - 8: ( - "This error return is deprecated as of CUDA 5.0. It is no longer an error" - " to call cudaProfilerStop() when profiling is already disabled." - ), - 9: ( - "This indicates that a kernel launch is requesting resources that can" - " never be satisfied by the current device. Requesting more shared memory" - " per block than the device supports will trigger this error, as will" - " requesting too many threads or blocks. See ::cudaDeviceProp for more" - " device limitations." - ), - 12: ( - "This indicates that one or more of the pitch-related parameters passed" - " to the API call is not within the acceptable range for pitch." - ), - 13: ("This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier."), - 16: ( - "This indicates that at least one host pointer passed to the API call is" - " not a valid host pointer." - " This error return is deprecated as of CUDA 10.1." - ), - 17: ( - "This indicates that at least one device pointer passed to the API call is" - " not a valid device pointer." - " This error return is deprecated as of CUDA 10.1." - ), - 18: ("This indicates that the texture passed to the API call is not a valid texture."), - 19: ( - "This indicates that the texture binding is not valid. This occurs if you" - " call ::cudaGetTextureAlignmentOffset() with an unbound texture." - ), - 20: ( - "This indicates that the channel descriptor passed to the API call is not" - " valid. This occurs if the format is not one of the formats specified by" - " ::cudaChannelFormatKind, or if one of the dimensions is invalid." - ), - 21: ( - "This indicates that the direction of the memcpy passed to the API call is" - " not one of the types specified by ::cudaMemcpyKind." - ), - 22: ( - "This indicated that the user has taken the address of a constant variable," - " which was forbidden up until the CUDA 3.1 release." - " This error return is deprecated as of CUDA 3.1. Variables in constant" - " memory may now have their address taken by the runtime via" - " ::cudaGetSymbolAddress()." - ), - 23: ( - "This indicated that a texture fetch was not able to be performed." - " This was previously used for device emulation of texture operations." - " This error return is deprecated as of CUDA 3.1. Device emulation mode was" - " removed with the CUDA 3.1 release." - ), - 24: ( - "This indicated that a texture was not bound for access." - " This was previously used for device emulation of texture operations." - " This error return is deprecated as of CUDA 3.1. Device emulation mode was" - " removed with the CUDA 3.1 release." - ), - 25: ( - "This indicated that a synchronization operation had failed." - " This was previously used for some device emulation functions." - " This error return is deprecated as of CUDA 3.1. Device emulation mode was" - " removed with the CUDA 3.1 release." - ), - 26: ( - "This indicates that a non-float texture was being accessed with linear" - " filtering. This is not supported by CUDA." - ), - 27: ( - "This indicates that an attempt was made to read an unsupported data type as a" - " normalized float. This is not supported by CUDA." - ), - 28: ( - "Mixing of device and device emulation code was not allowed." - " This error return is deprecated as of CUDA 3.1. Device emulation mode was" - " removed with the CUDA 3.1 release." - ), - 31: ( - "This indicates that the API call is not yet implemented. Production" - " releases of CUDA will never return this error." - " This error return is deprecated as of CUDA 4.1." - ), - 32: ( - "This indicated that an emulated device pointer exceeded the 32-bit address" - " range." - " This error return is deprecated as of CUDA 3.1. Device emulation mode was" - " removed with the CUDA 3.1 release." - ), - 34: ( - "This indicates that the CUDA driver that the application has loaded is a" - " stub library. Applications that run with the stub rather than a real" - " driver loaded will result in CUDA API returning this error." - ), - 35: ( - "This indicates that the installed NVIDIA CUDA driver is older than the" - " CUDA runtime library. This is not a supported configuration. Users should" - " install an updated NVIDIA display driver to allow the application to run." - ), - 36: ( - "This indicates that the API call requires a newer CUDA driver than the one" - " currently installed. Users should install an updated NVIDIA CUDA driver" - " to allow the API call to succeed." - ), - 37: ("This indicates that the surface passed to the API call is not a valid surface."), - 43: ( - "This indicates that multiple global or constant variables (across separate" - " CUDA source files in the application) share the same string name." - ), - 44: ( - "This indicates that multiple textures (across separate CUDA source" - " files in the application) share the same string name." - ), - 45: ( - "This indicates that multiple surfaces (across separate CUDA source" - " files in the application) share the same string name." - ), - 46: ( - "This indicates that all CUDA devices are busy or unavailable at the current" - " time. Devices are often busy/unavailable due to use of" - " ::cudaComputeModeProhibited, ::cudaComputeModeExclusiveProcess, or when long" - " running CUDA kernels have filled up the GPU and are blocking new work" - " from starting. They can also be unavailable due to memory constraints" - " on a device that already has active CUDA work being performed." - ), - 49: ( - "This indicates that the current context is not compatible with this" - " the CUDA Runtime. This can only occur if you are using CUDA" - " Runtime/Driver interoperability and have created an existing Driver" - " context using the driver API. The Driver context may be incompatible" - " either because the Driver context was created using an older version" - " of the API, because the Runtime API call expects a primary driver" - " context and the Driver context is not primary, or because the Driver" - ' context has been destroyed. Please see CUDART_DRIVER "Interactions' - ' with the CUDA Driver API" for more information.' - ), - 52: ( - "The device function being invoked (usually via ::cudaLaunchKernel()) was not" - " previously configured via the ::cudaConfigureCall() function." - ), - 53: ( - "This indicated that a previous kernel launch failed. This was previously" - " used for device emulation of kernel launches." - " This error return is deprecated as of CUDA 3.1. Device emulation mode was" - " removed with the CUDA 3.1 release." - ), - 65: ( - "This error indicates that a device runtime grid launch did not occur" - " because the depth of the child grid would exceed the maximum supported" - " number of nested grid launches." - ), - 66: ( - "This error indicates that a grid launch did not occur because the kernel" - " uses file-scoped textures which are unsupported by the device runtime." - " Kernels launched via the device runtime only support textures created with" - " the Texture Object API's." - ), - 67: ( - "This error indicates that a grid launch did not occur because the kernel" - " uses file-scoped surfaces which are unsupported by the device runtime." - " Kernels launched via the device runtime only support surfaces created with" - " the Surface Object API's." - ), - 68: ( - "This error indicates that a call to ::cudaDeviceSynchronize made from" - " the device runtime failed because the call was made at grid depth greater" - " than than either the default (2 levels of grids) or user specified device" - " limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on" - " launched grids at a greater depth successfully, the maximum nested" - " depth at which ::cudaDeviceSynchronize will be called must be specified" - " with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit" - " api before the host-side launch of a kernel using the device runtime." - " Keep in mind that additional levels of sync depth require the runtime" - " to reserve large amounts of device memory that cannot be used for" - " user allocations. Note that ::cudaDeviceSynchronize made from device" - " runtime is only supported on devices of compute capability < 9.0." - ), - 69: ( - "This error indicates that a device runtime grid launch failed because" - " the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount." - " For this launch to proceed successfully, ::cudaDeviceSetLimit must be" - " called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher" - " than the upper bound of outstanding launches that can be issued to the" - " device runtime. Keep in mind that raising the limit of pending device" - " runtime launches will require the runtime to reserve device memory that" - " cannot be used for user allocations." - ), - 98: ("The requested device function does not exist or is not compiled for the proper device architecture."), - 100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."), - 101: ( - "This indicates that the device ordinal supplied by the user does not" - " correspond to a valid CUDA device or that the action requested is" - " invalid for the specified device." - ), - 102: "This indicates that the device doesn't have a valid Grid License.", - 103: ( - "By default, the CUDA runtime may perform a minimal set of self-tests," - " as well as CUDA driver tests, to establish the validity of both." - " Introduced in CUDA 11.2, this error return indicates that at least one" - " of these tests has failed and the validity of either the runtime" - " or the driver could not be established." - ), - 127: "This indicates an internal startup failure in the CUDA runtime.", - 200: "This indicates that the device kernel image is invalid.", - 201: ( - "This most frequently indicates that there is no context bound to the" - " current thread. This can also be returned if the context passed to an" - " API call is not a valid handle (such as a context that has had" - " ::cuCtxDestroy() invoked on it). This can also be returned if a user" - " mixes different API versions (i.e. 3010 context with 3020 API calls)." - " See ::cuCtxGetApiVersion() for more details." - ), - 205: "This indicates that the buffer object could not be mapped.", - 206: "This indicates that the buffer object could not be unmapped.", - 207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."), - 208: "This indicates that the resource is already mapped.", - 209: ( - "This indicates that there is no kernel image available that is suitable" - " for the device. This can occur when a user specifies code generation" - " options for a particular CUDA source file that do not include the" - " corresponding device configuration." - ), - 210: "This indicates that a resource has already been acquired.", - 211: "This indicates that a resource is not mapped.", - 212: ("This indicates that a mapped resource is not available for access as an array."), - 213: ("This indicates that a mapped resource is not available for access as a pointer."), - 214: ("This indicates that an uncorrectable ECC error was detected during execution."), - 215: ("This indicates that the ::cudaLimit passed to the API call is not supported by the active device."), - 216: ( - "This indicates that a call tried to access an exclusive-thread device that" - " is already in use by a different thread." - ), - 217: ("This error indicates that P2P access is not supported across the given devices."), - 218: ( - "A PTX compilation failed. The runtime may fall back to compiling PTX if" - " an application does not contain a suitable binary for the current device." - ), - 219: "This indicates an error with the OpenGL or DirectX context.", - 220: ("This indicates that an uncorrectable NVLink error was detected during the execution."), - 221: ( - "This indicates that the PTX JIT compiler library was not found. The JIT Compiler" - " library is used for PTX compilation. The runtime may fall back to compiling PTX" - " if an application does not contain a suitable binary for the current device." - ), - 222: ( - "This indicates that the provided PTX was compiled with an unsupported toolchain." - " The most common reason for this, is the PTX was generated by a compiler newer" - " than what is supported by the CUDA driver and PTX JIT compiler." - ), - 223: ( - "This indicates that the JIT compilation was disabled. The JIT compilation compiles" - " PTX. The runtime may fall back to compiling PTX if an application does not contain" - " a suitable binary for the current device." - ), - 224: "This indicates that the provided execution affinity is not supported by the device.", - 225: ( - "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize." - ), - 226: ( - "This indicates that an exception occurred on the device that is now" - " contained by the GPU's error containment capability. Common causes are -" - " a. Certain types of invalid accesses of peer GPU memory over nvlink" - " b. Certain classes of hardware errors" - " This leaves the process in an inconsistent state and any further CUDA" - " work will return the same error. To continue using CUDA, the process must" - " be terminated and relaunched." - ), - 300: "This indicates that the device kernel source is invalid.", - 301: "This indicates that the file specified was not found.", - 302: "This indicates that a link to a shared object failed to resolve.", - 303: "This indicates that initialization of a shared object failed.", - 304: "This error indicates that an OS call failed.", - 400: ( - "This indicates that a resource handle passed to the API call was not" - " valid. Resource handles are opaque types like ::cudaStream_t and" - " ::cudaEvent_t." - ), - 401: ( - "This indicates that a resource required by the API call is not in a" - " valid state to perform the requested operation." - ), - 402: ( - "This indicates an attempt was made to introspect an object in a way that" - " would discard semantically important information. This is either due to" - " the object using funtionality newer than the API version used to" - " introspect it or omission of optional return arguments." - ), - 500: ( - "This indicates that a named symbol was not found. Examples of symbols" - " are global/constant variable names, driver function names, texture names," - " and surface names." - ), - 600: ( - "This indicates that asynchronous operations issued previously have not" - " completed yet. This result is not actually an error, but must be indicated" - " differently than ::cudaSuccess (which indicates completion). Calls that" - " may return this value include ::cudaEventQuery() and ::cudaStreamQuery()." - ), - 700: ( - "The device encountered a load or store instruction on an invalid memory address." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 701: ( - "This indicates that a launch did not occur because it did not have" - " appropriate resources. Although this error is similar to" - " ::cudaErrorInvalidConfiguration, this error usually indicates that the" - " user has attempted to pass too many arguments to the device kernel, or the" - " kernel launch specifies too many threads for the kernel's register count." - ), - 702: ( - "This indicates that the device kernel took too long to execute. This can" - " only occur if timeouts are enabled - see the device attribute" - ' ::cudaDeviceAttr::cudaDevAttrKernelExecTimeout "cudaDevAttrKernelExecTimeout"' - " for more information." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 703: ("This error indicates a kernel launch that uses an incompatible texturing mode."), - 704: ( - "This error indicates that a call to ::cudaDeviceEnablePeerAccess() is" - " trying to re-enable peer addressing on from a context which has already" - " had peer addressing enabled." - ), - 705: ( - "This error indicates that ::cudaDeviceDisablePeerAccess() is trying to" - " disable peer addressing which has not been enabled yet via" - " ::cudaDeviceEnablePeerAccess()." - ), - 708: ( - "This indicates that the user has called ::cudaSetValidDevices()," - " ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice()," - " ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or" - " ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by" - " calling non-device management operations (allocating memory and" - " launching kernels are examples of non-device management operations)." - " This error can also be returned if using runtime/driver" - " interoperability and there is an existing ::CUcontext active on the" - " host thread." - ), - 709: ( - "This error indicates that the context current to the calling thread" - " has been destroyed using ::cuCtxDestroy, or is a primary context which" - " has not yet been initialized." - ), - 710: ( - "An assert triggered in device code during kernel execution. The device" - " cannot be used again. All existing allocations are invalid. To continue" - " using CUDA, the process must be terminated and relaunched." - ), - 711: ( - "This error indicates that the hardware resources required to enable" - " peer access have been exhausted for one or more of the devices" - " passed to ::cudaEnablePeerAccess()." - ), - 712: ("This error indicates that the memory range passed to ::cudaHostRegister() has already been registered."), - 713: ( - "This error indicates that the pointer passed to ::cudaHostUnregister()" - " does not correspond to any currently registered memory region." - ), - 714: ( - "Device encountered an error in the call stack during kernel execution," - " possibly due to stack corruption or exceeding the stack size limit." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 715: ( - "The device encountered an illegal instruction during kernel execution" - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 716: ( - "The device encountered a load or store instruction" - " on a memory address which is not aligned." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 717: ( - "While executing a kernel, the device encountered an instruction" - " which can only operate on memory locations in certain address spaces" - " (global, shared, or local), but was supplied a memory address not" - " belonging to an allowed address space." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 718: ( - "The device encountered an invalid program counter." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 719: ( - "An exception occurred on the device while executing a kernel. Common" - " causes include dereferencing an invalid device pointer and accessing" - " out of bounds shared memory. Less common cases can be system specific - more" - " information about these cases can be found in the system specific user guide." - " This leaves the process in an inconsistent state and any further CUDA work" - " will return the same error. To continue using CUDA, the process must be terminated" - " and relaunched." - ), - 720: ( - "This error indicates that the number of blocks launched per grid for a kernel that was" - " launched via either ::cudaLaunchCooperativeKernel" - " exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor" - " or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors" - " as specified by the device attribute ::cudaDevAttrMultiProcessorCount." - ), - 721: ( - "An exception occurred on the device while exiting a kernel using tensor memory: the" - " tensor memory was not completely deallocated. This leaves the process in an inconsistent" - " state and any further CUDA work will return the same error. To continue using CUDA, the" - " process must be terminated and relaunched." - ), - 800: "This error indicates the attempted operation is not permitted.", - 801: ("This error indicates the attempted operation is not supported on the current system or device."), - 802: ( - "This error indicates that the system is not yet ready to start any CUDA" - " work. To continue using CUDA, verify the system configuration is in a" - " valid state and all required driver daemons are actively running." - " More information about this error can be found in the system specific" - " user guide." - ), - 803: ( - "This error indicates that there is a mismatch between the versions of" - " the display driver and the CUDA driver. Refer to the compatibility documentation" - " for supported versions." - ), - 804: ( - "This error indicates that the system was upgraded to run with forward compatibility" - " but the visible hardware detected by CUDA does not support this configuration." - " Refer to the compatibility documentation for the supported hardware matrix or ensure" - " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES" - " environment variable." - ), - 805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.", - 806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.", - 807: ( - "This error indicates that the MPS server is not ready to accept new MPS client requests." - " This error can be returned when the MPS server is in the process of recovering from a fatal failure." - ), - 808: "This error indicates that the hardware resources required to create MPS client have been exhausted.", - 809: "This error indicates the the hardware resources required to device connections have been exhausted.", - 810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.", - 811: "This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.", - 812: "This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.", - 900: "The operation is not permitted when the stream is capturing.", - 901: ("The current capture sequence on the stream has been invalidated due to a previous error."), - 902: ("The operation would have resulted in a merge of two independent capture sequences."), - 903: "The capture was not initiated in this stream.", - 904: ("The capture sequence contains a fork that was not joined to the primary stream."), - 905: ( - "A dependency would have been created which crosses the capture sequence" - " boundary. Only implicit in-stream ordering dependencies are allowed to" - " cross the boundary." - ), - 906: ( - "The operation would have resulted in a disallowed implicit dependency on" - " a current capture sequence from cudaStreamLegacy." - ), - 907: ("The operation is not permitted on an event which was last recorded in a capturing stream."), - 908: ( - "A stream capture sequence not initiated with the ::cudaStreamCaptureModeRelaxed" - " argument to ::cudaStreamBeginCapture was passed to ::cudaStreamEndCapture in a" - " different thread." - ), - 909: "This indicates that the wait operation has timed out.", - 910: ( - "This error indicates that the graph update was not performed because it included" - " changes which violated constraints specific to instantiated graph update." - ), - 911: ( - "This indicates that an async error has occurred in a device outside of CUDA." - " If CUDA was waiting for an external device's signal before consuming shared data," - " the external device signaled an error indicating that the data is not valid for" - " consumption. This leaves the process in an inconsistent state and any further CUDA" - " work will return the same error. To continue using CUDA, the process must be" - " terminated and relaunched." - ), - 912: ("This indicates that a kernel launch error has occurred due to cluster misconfiguration."), - 913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."), - 914: ("This error indicates one or more resources passed in are not valid resource types for the operation."), - 915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."), - 917: ( - "This error indicates that the requested operation is not permitted because the" - " stream is in a detached state. This can occur if the green context associated" - " with the stream has been destroyed, limiting the stream's operational capabilities." - ), - 999: "This indicates that an unknown internal error has occurred.", - 10000: ( - "Any unhandled CUDA driver error is added to this value and returned via" - " the runtime. Production releases of CUDA should not return such errors." - " This error return is deprecated as of CUDA 4.1." - ), -} -RUNTIME_CUDA_ERROR_EXPLANATIONS = get_best_available_explanations(runtime.cudaError_t, _FALLBACK_EXPLANATIONS) +def _load_fallback_explanations(): + from cuda.core._utils.runtime_cuda_error_explanations_frozen import _FALLBACK_EXPLANATIONS + + return _FALLBACK_EXPLANATIONS + + +RUNTIME_CUDA_ERROR_EXPLANATIONS = get_best_available_explanations(runtime.cudaError_t, _load_fallback_explanations) diff --git a/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations_frozen.py b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations_frozen.py new file mode 100644 index 0000000000..497b2ad20d --- /dev/null +++ b/cuda_core/cuda/core/_utils/runtime_cuda_error_explanations_frozen.py @@ -0,0 +1,538 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# CUDA Toolkit v13.1.1 +_FALLBACK_EXPLANATIONS = { + 0: ( + "The API call returned with no errors. In the case of query calls, this" + " also means that the operation being queried is complete (see" + " ::cudaEventQuery() and ::cudaStreamQuery())." + ), + 1: ( + "This indicates that one or more of the parameters passed to the API call" + " is not within an acceptable range of values." + ), + 2: ( + "The API call failed because it was unable to allocate enough memory or" + " other resources to perform the requested operation." + ), + 3: ("The API call failed because the CUDA driver and runtime could not be initialized."), + 4: ( + "This indicates that a CUDA Runtime API call cannot be executed because" + " it is being called during process shut down, at a point in time after" + " CUDA driver has been unloaded." + ), + 5: ( + "This indicates profiler is not initialized for this run. This can" + " happen when the application is running with external profiling tools" + " like visual profiler." + ), + 6: ( + "This error return is deprecated as of CUDA 5.0. It is no longer an error" + " to attempt to enable/disable the profiling via ::cudaProfilerStart or" + " ::cudaProfilerStop without initialization." + ), + 7: ( + "This error return is deprecated as of CUDA 5.0. It is no longer an error" + " to call cudaProfilerStart() when profiling is already enabled." + ), + 8: ( + "This error return is deprecated as of CUDA 5.0. It is no longer an error" + " to call cudaProfilerStop() when profiling is already disabled." + ), + 9: ( + "This indicates that a kernel launch is requesting resources that can" + " never be satisfied by the current device. Requesting more shared memory" + " per block than the device supports will trigger this error, as will" + " requesting too many threads or blocks. See ::cudaDeviceProp for more" + " device limitations." + ), + 12: ( + "This indicates that one or more of the pitch-related parameters passed" + " to the API call is not within the acceptable range for pitch." + ), + 13: ("This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier."), + 16: ( + "This indicates that at least one host pointer passed to the API call is" + " not a valid host pointer." + " This error return is deprecated as of CUDA 10.1." + ), + 17: ( + "This indicates that at least one device pointer passed to the API call is" + " not a valid device pointer." + " This error return is deprecated as of CUDA 10.1." + ), + 18: ("This indicates that the texture passed to the API call is not a valid texture."), + 19: ( + "This indicates that the texture binding is not valid. This occurs if you" + " call ::cudaGetTextureAlignmentOffset() with an unbound texture." + ), + 20: ( + "This indicates that the channel descriptor passed to the API call is not" + " valid. This occurs if the format is not one of the formats specified by" + " ::cudaChannelFormatKind, or if one of the dimensions is invalid." + ), + 21: ( + "This indicates that the direction of the memcpy passed to the API call is" + " not one of the types specified by ::cudaMemcpyKind." + ), + 22: ( + "This indicated that the user has taken the address of a constant variable," + " which was forbidden up until the CUDA 3.1 release." + " This error return is deprecated as of CUDA 3.1. Variables in constant" + " memory may now have their address taken by the runtime via" + " ::cudaGetSymbolAddress()." + ), + 23: ( + "This indicated that a texture fetch was not able to be performed." + " This was previously used for device emulation of texture operations." + " This error return is deprecated as of CUDA 3.1. Device emulation mode was" + " removed with the CUDA 3.1 release." + ), + 24: ( + "This indicated that a texture was not bound for access." + " This was previously used for device emulation of texture operations." + " This error return is deprecated as of CUDA 3.1. Device emulation mode was" + " removed with the CUDA 3.1 release." + ), + 25: ( + "This indicated that a synchronization operation had failed." + " This was previously used for some device emulation functions." + " This error return is deprecated as of CUDA 3.1. Device emulation mode was" + " removed with the CUDA 3.1 release." + ), + 26: ( + "This indicates that a non-float texture was being accessed with linear" + " filtering. This is not supported by CUDA." + ), + 27: ( + "This indicates that an attempt was made to read an unsupported data type as a" + " normalized float. This is not supported by CUDA." + ), + 28: ( + "Mixing of device and device emulation code was not allowed." + " This error return is deprecated as of CUDA 3.1. Device emulation mode was" + " removed with the CUDA 3.1 release." + ), + 31: ( + "This indicates that the API call is not yet implemented. Production" + " releases of CUDA will never return this error." + " This error return is deprecated as of CUDA 4.1." + ), + 32: ( + "This indicated that an emulated device pointer exceeded the 32-bit address" + " range." + " This error return is deprecated as of CUDA 3.1. Device emulation mode was" + " removed with the CUDA 3.1 release." + ), + 34: ( + "This indicates that the CUDA driver that the application has loaded is a" + " stub library. Applications that run with the stub rather than a real" + " driver loaded will result in CUDA API returning this error." + ), + 35: ( + "This indicates that the installed NVIDIA CUDA driver is older than the" + " CUDA runtime library. This is not a supported configuration. Users should" + " install an updated NVIDIA display driver to allow the application to run." + ), + 36: ( + "This indicates that the API call requires a newer CUDA driver than the one" + " currently installed. Users should install an updated NVIDIA CUDA driver" + " to allow the API call to succeed." + ), + 37: ("This indicates that the surface passed to the API call is not a valid surface."), + 43: ( + "This indicates that multiple global or constant variables (across separate" + " CUDA source files in the application) share the same string name." + ), + 44: ( + "This indicates that multiple textures (across separate CUDA source" + " files in the application) share the same string name." + ), + 45: ( + "This indicates that multiple surfaces (across separate CUDA source" + " files in the application) share the same string name." + ), + 46: ( + "This indicates that all CUDA devices are busy or unavailable at the current" + " time. Devices are often busy/unavailable due to use of" + " ::cudaComputeModeProhibited, ::cudaComputeModeExclusiveProcess, or when long" + " running CUDA kernels have filled up the GPU and are blocking new work" + " from starting. They can also be unavailable due to memory constraints" + " on a device that already has active CUDA work being performed." + ), + 49: ( + "This indicates that the current context is not compatible with this" + " the CUDA Runtime. This can only occur if you are using CUDA" + " Runtime/Driver interoperability and have created an existing Driver" + " context using the driver API. The Driver context may be incompatible" + " either because the Driver context was created using an older version" + " of the API, because the Runtime API call expects a primary driver" + " context and the Driver context is not primary, or because the Driver" + ' context has been destroyed. Please see CUDART_DRIVER "Interactions' + ' with the CUDA Driver API" for more information.' + ), + 52: ( + "The device function being invoked (usually via ::cudaLaunchKernel()) was not" + " previously configured via the ::cudaConfigureCall() function." + ), + 53: ( + "This indicated that a previous kernel launch failed. This was previously" + " used for device emulation of kernel launches." + " This error return is deprecated as of CUDA 3.1. Device emulation mode was" + " removed with the CUDA 3.1 release." + ), + 65: ( + "This error indicates that a device runtime grid launch did not occur" + " because the depth of the child grid would exceed the maximum supported" + " number of nested grid launches." + ), + 66: ( + "This error indicates that a grid launch did not occur because the kernel" + " uses file-scoped textures which are unsupported by the device runtime." + " Kernels launched via the device runtime only support textures created with" + " the Texture Object API's." + ), + 67: ( + "This error indicates that a grid launch did not occur because the kernel" + " uses file-scoped surfaces which are unsupported by the device runtime." + " Kernels launched via the device runtime only support surfaces created with" + " the Surface Object API's." + ), + 68: ( + "This error indicates that a call to ::cudaDeviceSynchronize made from" + " the device runtime failed because the call was made at grid depth greater" + " than than either the default (2 levels of grids) or user specified device" + " limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on" + " launched grids at a greater depth successfully, the maximum nested" + " depth at which ::cudaDeviceSynchronize will be called must be specified" + " with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit" + " api before the host-side launch of a kernel using the device runtime." + " Keep in mind that additional levels of sync depth require the runtime" + " to reserve large amounts of device memory that cannot be used for" + " user allocations. Note that ::cudaDeviceSynchronize made from device" + " runtime is only supported on devices of compute capability < 9.0." + ), + 69: ( + "This error indicates that a device runtime grid launch failed because" + " the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount." + " For this launch to proceed successfully, ::cudaDeviceSetLimit must be" + " called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher" + " than the upper bound of outstanding launches that can be issued to the" + " device runtime. Keep in mind that raising the limit of pending device" + " runtime launches will require the runtime to reserve device memory that" + " cannot be used for user allocations." + ), + 98: ("The requested device function does not exist or is not compiled for the proper device architecture."), + 100: ("This indicates that no CUDA-capable devices were detected by the installed CUDA driver."), + 101: ( + "This indicates that the device ordinal supplied by the user does not" + " correspond to a valid CUDA device or that the action requested is" + " invalid for the specified device." + ), + 102: "This indicates that the device doesn't have a valid Grid License.", + 103: ( + "By default, the CUDA runtime may perform a minimal set of self-tests," + " as well as CUDA driver tests, to establish the validity of both." + " Introduced in CUDA 11.2, this error return indicates that at least one" + " of these tests has failed and the validity of either the runtime" + " or the driver could not be established." + ), + 127: "This indicates an internal startup failure in the CUDA runtime.", + 200: "This indicates that the device kernel image is invalid.", + 201: ( + "This most frequently indicates that there is no context bound to the" + " current thread. This can also be returned if the context passed to an" + " API call is not a valid handle (such as a context that has had" + " ::cuCtxDestroy() invoked on it). This can also be returned if a user" + " mixes different API versions (i.e. 3010 context with 3020 API calls)." + " See ::cuCtxGetApiVersion() for more details." + ), + 205: "This indicates that the buffer object could not be mapped.", + 206: "This indicates that the buffer object could not be unmapped.", + 207: ("This indicates that the specified array is currently mapped and thus cannot be destroyed."), + 208: "This indicates that the resource is already mapped.", + 209: ( + "This indicates that there is no kernel image available that is suitable" + " for the device. This can occur when a user specifies code generation" + " options for a particular CUDA source file that do not include the" + " corresponding device configuration." + ), + 210: "This indicates that a resource has already been acquired.", + 211: "This indicates that a resource is not mapped.", + 212: ("This indicates that a mapped resource is not available for access as an array."), + 213: ("This indicates that a mapped resource is not available for access as a pointer."), + 214: ("This indicates that an uncorrectable ECC error was detected during execution."), + 215: ("This indicates that the ::cudaLimit passed to the API call is not supported by the active device."), + 216: ( + "This indicates that a call tried to access an exclusive-thread device that" + " is already in use by a different thread." + ), + 217: ("This error indicates that P2P access is not supported across the given devices."), + 218: ( + "A PTX compilation failed. The runtime may fall back to compiling PTX if" + " an application does not contain a suitable binary for the current device." + ), + 219: "This indicates an error with the OpenGL or DirectX context.", + 220: ("This indicates that an uncorrectable NVLink error was detected during the execution."), + 221: ( + "This indicates that the PTX JIT compiler library was not found. The JIT Compiler" + " library is used for PTX compilation. The runtime may fall back to compiling PTX" + " if an application does not contain a suitable binary for the current device." + ), + 222: ( + "This indicates that the provided PTX was compiled with an unsupported toolchain." + " The most common reason for this, is the PTX was generated by a compiler newer" + " than what is supported by the CUDA driver and PTX JIT compiler." + ), + 223: ( + "This indicates that the JIT compilation was disabled. The JIT compilation compiles" + " PTX. The runtime may fall back to compiling PTX if an application does not contain" + " a suitable binary for the current device." + ), + 224: "This indicates that the provided execution affinity is not supported by the device.", + 225: ( + "This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize." + ), + 226: ( + "This indicates that an exception occurred on the device that is now" + " contained by the GPU's error containment capability. Common causes are -" + " a. Certain types of invalid accesses of peer GPU memory over nvlink" + " b. Certain classes of hardware errors" + " This leaves the process in an inconsistent state and any further CUDA" + " work will return the same error. To continue using CUDA, the process must" + " be terminated and relaunched." + ), + 300: "This indicates that the device kernel source is invalid.", + 301: "This indicates that the file specified was not found.", + 302: "This indicates that a link to a shared object failed to resolve.", + 303: "This indicates that initialization of a shared object failed.", + 304: "This error indicates that an OS call failed.", + 400: ( + "This indicates that a resource handle passed to the API call was not" + " valid. Resource handles are opaque types like ::cudaStream_t and" + " ::cudaEvent_t." + ), + 401: ( + "This indicates that a resource required by the API call is not in a" + " valid state to perform the requested operation." + ), + 402: ( + "This indicates an attempt was made to introspect an object in a way that" + " would discard semantically important information. This is either due to" + " the object using funtionality newer than the API version used to" + " introspect it or omission of optional return arguments." + ), + 500: ( + "This indicates that a named symbol was not found. Examples of symbols" + " are global/constant variable names, driver function names, texture names," + " and surface names." + ), + 600: ( + "This indicates that asynchronous operations issued previously have not" + " completed yet. This result is not actually an error, but must be indicated" + " differently than ::cudaSuccess (which indicates completion). Calls that" + " may return this value include ::cudaEventQuery() and ::cudaStreamQuery()." + ), + 700: ( + "The device encountered a load or store instruction on an invalid memory address." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 701: ( + "This indicates that a launch did not occur because it did not have" + " appropriate resources. Although this error is similar to" + " ::cudaErrorInvalidConfiguration, this error usually indicates that the" + " user has attempted to pass too many arguments to the device kernel, or the" + " kernel launch specifies too many threads for the kernel's register count." + ), + 702: ( + "This indicates that the device kernel took too long to execute. This can" + " only occur if timeouts are enabled - see the device attribute" + ' ::cudaDeviceAttr::cudaDevAttrKernelExecTimeout "cudaDevAttrKernelExecTimeout"' + " for more information." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 703: ("This error indicates a kernel launch that uses an incompatible texturing mode."), + 704: ( + "This error indicates that a call to ::cudaDeviceEnablePeerAccess() is" + " trying to re-enable peer addressing on from a context which has already" + " had peer addressing enabled." + ), + 705: ( + "This error indicates that ::cudaDeviceDisablePeerAccess() is trying to" + " disable peer addressing which has not been enabled yet via" + " ::cudaDeviceEnablePeerAccess()." + ), + 708: ( + "This indicates that the user has called ::cudaSetValidDevices()," + " ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice()," + " ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or" + " ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by" + " calling non-device management operations (allocating memory and" + " launching kernels are examples of non-device management operations)." + " This error can also be returned if using runtime/driver" + " interoperability and there is an existing ::CUcontext active on the" + " host thread." + ), + 709: ( + "This error indicates that the context current to the calling thread" + " has been destroyed using ::cuCtxDestroy, or is a primary context which" + " has not yet been initialized." + ), + 710: ( + "An assert triggered in device code during kernel execution. The device" + " cannot be used again. All existing allocations are invalid. To continue" + " using CUDA, the process must be terminated and relaunched." + ), + 711: ( + "This error indicates that the hardware resources required to enable" + " peer access have been exhausted for one or more of the devices" + " passed to ::cudaEnablePeerAccess()." + ), + 712: ("This error indicates that the memory range passed to ::cudaHostRegister() has already been registered."), + 713: ( + "This error indicates that the pointer passed to ::cudaHostUnregister()" + " does not correspond to any currently registered memory region." + ), + 714: ( + "Device encountered an error in the call stack during kernel execution," + " possibly due to stack corruption or exceeding the stack size limit." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 715: ( + "The device encountered an illegal instruction during kernel execution" + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 716: ( + "The device encountered a load or store instruction" + " on a memory address which is not aligned." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 717: ( + "While executing a kernel, the device encountered an instruction" + " which can only operate on memory locations in certain address spaces" + " (global, shared, or local), but was supplied a memory address not" + " belonging to an allowed address space." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 718: ( + "The device encountered an invalid program counter." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 719: ( + "An exception occurred on the device while executing a kernel. Common" + " causes include dereferencing an invalid device pointer and accessing" + " out of bounds shared memory. Less common cases can be system specific - more" + " information about these cases can be found in the system specific user guide." + " This leaves the process in an inconsistent state and any further CUDA work" + " will return the same error. To continue using CUDA, the process must be terminated" + " and relaunched." + ), + 720: ( + "This error indicates that the number of blocks launched per grid for a kernel that was" + " launched via either ::cudaLaunchCooperativeKernel" + " exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor" + " or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors" + " as specified by the device attribute ::cudaDevAttrMultiProcessorCount." + ), + 721: ( + "An exception occurred on the device while exiting a kernel using tensor memory: the" + " tensor memory was not completely deallocated. This leaves the process in an inconsistent" + " state and any further CUDA work will return the same error. To continue using CUDA, the" + " process must be terminated and relaunched." + ), + 800: "This error indicates the attempted operation is not permitted.", + 801: ("This error indicates the attempted operation is not supported on the current system or device."), + 802: ( + "This error indicates that the system is not yet ready to start any CUDA" + " work. To continue using CUDA, verify the system configuration is in a" + " valid state and all required driver daemons are actively running." + " More information about this error can be found in the system specific" + " user guide." + ), + 803: ( + "This error indicates that there is a mismatch between the versions of" + " the display driver and the CUDA driver. Refer to the compatibility documentation" + " for supported versions." + ), + 804: ( + "This error indicates that the system was upgraded to run with forward compatibility" + " but the visible hardware detected by CUDA does not support this configuration." + " Refer to the compatibility documentation for the supported hardware matrix or ensure" + " that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES" + " environment variable." + ), + 805: "This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.", + 806: "This error indicates that the remote procedural call between the MPS server and the MPS client failed.", + 807: ( + "This error indicates that the MPS server is not ready to accept new MPS client requests." + " This error can be returned when the MPS server is in the process of recovering from a fatal failure." + ), + 808: "This error indicates that the hardware resources required to create MPS client have been exhausted.", + 809: "This error indicates the the hardware resources required to device connections have been exhausted.", + 810: "This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.", + 811: "This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.", + 812: "This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.", + 900: "The operation is not permitted when the stream is capturing.", + 901: ("The current capture sequence on the stream has been invalidated due to a previous error."), + 902: ("The operation would have resulted in a merge of two independent capture sequences."), + 903: "The capture was not initiated in this stream.", + 904: ("The capture sequence contains a fork that was not joined to the primary stream."), + 905: ( + "A dependency would have been created which crosses the capture sequence" + " boundary. Only implicit in-stream ordering dependencies are allowed to" + " cross the boundary." + ), + 906: ( + "The operation would have resulted in a disallowed implicit dependency on" + " a current capture sequence from cudaStreamLegacy." + ), + 907: ("The operation is not permitted on an event which was last recorded in a capturing stream."), + 908: ( + "A stream capture sequence not initiated with the ::cudaStreamCaptureModeRelaxed" + " argument to ::cudaStreamBeginCapture was passed to ::cudaStreamEndCapture in a" + " different thread." + ), + 909: "This indicates that the wait operation has timed out.", + 910: ( + "This error indicates that the graph update was not performed because it included" + " changes which violated constraints specific to instantiated graph update." + ), + 911: ( + "This indicates that an async error has occurred in a device outside of CUDA." + " If CUDA was waiting for an external device's signal before consuming shared data," + " the external device signaled an error indicating that the data is not valid for" + " consumption. This leaves the process in an inconsistent state and any further CUDA" + " work will return the same error. To continue using CUDA, the process must be" + " terminated and relaunched." + ), + 912: ("This indicates that a kernel launch error has occurred due to cluster misconfiguration."), + 913: ("Indiciates a function handle is not loaded when calling an API that requires a loaded function."), + 914: ("This error indicates one or more resources passed in are not valid resource types for the operation."), + 915: ("This error indicates one or more resources are insufficient or non-applicable for the operation."), + 917: ( + "This error indicates that the requested operation is not permitted because the" + " stream is in a detached state. This can occur if the green context associated" + " with the stream has been destroyed, limiting the stream's operational capabilities." + ), + 999: "This indicates that an unknown internal error has occurred.", + 10000: ( + "Any unhandled CUDA driver error is added to this value and returned via" + " the runtime. Production releases of CUDA should not return such errors." + " This error return is deprecated as of CUDA 4.1." + ), +} diff --git a/cuda_core/tests/test_utils_enum_explanations_helpers.py b/cuda_core/tests/test_utils_enum_explanations_helpers.py index d31e40ee47..d46355aefd 100644 --- a/cuda_core/tests/test_utils_enum_explanations_helpers.py +++ b/cuda_core/tests/test_utils_enum_explanations_helpers.py @@ -2,6 +2,9 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import importlib +import sys + import pytest from cuda.core._utils import enum_explanations_helpers @@ -124,3 +127,44 @@ def test_get_best_available_explanations_switches_by_version(monkeypatch, versio assert expl.get(7) == "clean me" else: assert expl is fallback + + +def test_get_best_available_explanations_calls_loader_before_docstrings(monkeypatch): + fallback = {7: "fallback text"} + calls = [] + + def load_fallback(): + calls.append("loaded") + return fallback + + monkeypatch.setattr(enum_explanations_helpers, "_binding_version", lambda: (13, 1, 1)) + expl = enum_explanations_helpers.get_best_available_explanations( + _FakeEnumType({7: _FakeEnumMember("clean me")}), + load_fallback, + ) + assert expl is fallback + assert calls == ["loaded"] + + +def test_driver_explanations_module_skips_fallback_import_when_docstrings_available(monkeypatch): + import cuda.core._utils.driver_cu_result_explanations as driver_explanations + + monkeypatch.setattr(enum_explanations_helpers, "_binding_version", lambda: (13, 2, 0)) + sys.modules.pop("cuda.core._utils.driver_cu_result_explanations_frozen", None) + + importlib.reload(driver_explanations) + + assert "cuda.core._utils.driver_cu_result_explanations_frozen" not in sys.modules + assert isinstance(driver_explanations.DRIVER_CU_RESULT_EXPLANATIONS, DocstringBackedExplanations) + + +def test_runtime_explanations_module_skips_fallback_import_when_docstrings_available(monkeypatch): + import cuda.core._utils.runtime_cuda_error_explanations as runtime_explanations + + monkeypatch.setattr(enum_explanations_helpers, "_binding_version", lambda: (13, 2, 0)) + sys.modules.pop("cuda.core._utils.runtime_cuda_error_explanations_frozen", None) + + importlib.reload(runtime_explanations) + + assert "cuda.core._utils.runtime_cuda_error_explanations_frozen" not in sys.modules + assert isinstance(runtime_explanations.RUNTIME_CUDA_ERROR_EXPLANATIONS, DocstringBackedExplanations) From 27aae00d7e4529b1aa372d53b57fe698d58bfb7c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 6 Apr 2026 12:55:39 -0700 Subject: [PATCH 9/9] cuda_core: pin cleanup-sensitive enum doc examples Add a small set of real enum-doc cleanup examples that assert today's exact cleaned output for representative live bindings cases. Mark unexpected drift as xfail so future upstream doc changes trigger manual review without causing a hard test failure. Made-with: Cursor --- cuda_core/tests/test_cuda_utils.py | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index 7dc3703749..1357ca3a12 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -19,6 +19,15 @@ def _skip_if_bindings_pre_enum_docstrings(): pytest.skip("cuda-bindings version does not expose usable enum __doc__ strings") +def _assert_cleanup_example_matches_or_xfail(actual, expected): + # Pin a few real cleanup-sensitive enum docs. If one starts failing, review + # the raw ``__doc__`` and today's cleaned output: either update the expected + # text to match an acceptable upstream change, or fix the cleanup logic. + if actual != expected: + pytest.xfail("please review this failure") + assert actual == expected + + def test_check_driver_error(): num_unexpected = 0 for error in driver.CUresult: @@ -73,6 +82,69 @@ def test_runtime_error_enum_has_non_empty_docstring(): assert doc.strip() != "" +# These use real enum members rather than synthetic strings, to pin a few +# representative cleanup-sensitive docs end to end. Together with the helper +# unit tests, this gives a harder assurance that today's live bindings output +# is rendered into the user-facing text we expect. Unexpected changes are +# marked as xfail so they prompt manual review of the drift, without causing +# a hard test failure. +@pytest.mark.parametrize( + ("explanations", "error", "expected"), + [ + pytest.param( + cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS, + driver.CUresult.CUDA_ERROR_NOT_INITIALIZED, + "This indicates that the CUDA driver has not been initialized with cuInit() or that initialization has failed.", + id="driver_not_initialized_role_cleanup", + ), + pytest.param( + cuda_utils.DRIVER_CU_RESULT_EXPLANATIONS, + driver.CUresult.CUDA_ERROR_INVALID_CONTEXT, + ( + "This most frequently indicates that there is no context bound to the current thread. " + "This can also be returned if the context passed to an API call is not a valid handle " + "(such as a context that has had cuCtxDestroy() invoked on it). This can also be " + "returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls). " + "See cuCtxGetApiVersion() for more details. This can also be returned if the green " + "context passed to an API call was not converted to a CUcontext using cuCtxFromGreenCtx API." + ), + id="driver_invalid_context_multiple_roles", + ), + pytest.param( + cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS, + runtime.cudaError_t.cudaErrorLaunchTimeout, + ( + "This indicates that the device kernel took too long to execute. This can only occur " + "if timeouts are enabled - see the device attribute cudaDevAttrKernelExecTimeout for " + "more information. This leaves the process in an inconsistent state and any further " + "CUDA work will return the same error. To continue using CUDA, the process must be " + "terminated and relaunched." + ), + id="runtime_launch_timeout_role_cleanup", + ), + pytest.param( + cuda_utils.RUNTIME_CUDA_ERROR_EXPLANATIONS, + runtime.cudaError_t.cudaErrorIncompatibleDriverContext, + ( + "This indicates that the current context is not compatible with this the CUDA Runtime. " + "This can only occur if you are using CUDA Runtime/Driver interoperability and have " + "created an existing Driver context using the driver API. The Driver context may be " + "incompatible either because the Driver context was created using an older version of " + "the API, because the Runtime API call expects a primary driver context and the Driver " + "context is not primary, or because the Driver context has been destroyed. Please see " + '"Interactions with the CUDA Driver API" for more information.' + ), + id="runtime_incompatible_driver_context_codegen_bug", + ), + ], +) +def test_enum_doc_cleanup_examples_are_reviewed_on_change(explanations, error, expected): + _skip_if_bindings_pre_enum_docstrings() + + actual = explanations.get(int(error)) + _assert_cleanup_example_matches_or_xfail(actual, expected) + + def test_check_driver_error_attaches_explanation(): error = driver.CUresult.CUDA_ERROR_INVALID_VALUE name_err, name = driver.cuGetErrorName(error)