Skip to content

Commit 27c73e2

Browse files
committed
refactor(pathfinder): replace spawned child runner with subprocess entrypoint
Move the remaining load-isolation tests onto the same dedicated subprocess pattern as the canary probe so `cuda_pathfinder` only has one subprocess model to maintain. This removes the generic multiprocessing helper while preserving the existing real-loading coverage and failure reporting. Made-with: Cursor
1 parent 15cfd84 commit 27c73e2

File tree

8 files changed

+200
-212
lines changed

8 files changed

+200
-212
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env python
2+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from __future__ import annotations
6+
7+
import json
8+
import os
9+
import sys
10+
import traceback
11+
from collections.abc import Sequence
12+
13+
DYNAMIC_LIB_NOT_FOUND_MARKER = "CHILD_LOAD_NVIDIA_DYNAMIC_LIB_HELPER_DYNAMIC_LIB_NOT_FOUND_ERROR:"
14+
15+
16+
def _validate_abs_path(abs_path: str) -> None:
17+
assert abs_path, f"empty path: {abs_path=!r}"
18+
assert os.path.isabs(abs_path), f"not absolute: {abs_path=!r}"
19+
assert os.path.isfile(abs_path), f"not a file: {abs_path=!r}"
20+
21+
22+
def _load_nvidia_dynamic_lib_for_test(libname: str) -> str:
23+
# Keep imports inside the subprocess body so startup stays focused on the
24+
# code under test rather than the parent test module.
25+
from cuda.pathfinder import load_nvidia_dynamic_lib
26+
from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import _load_lib_no_cache
27+
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
28+
SUPPORTED_LINUX_SONAMES,
29+
SUPPORTED_WINDOWS_DLLS,
30+
)
31+
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
32+
33+
loaded_dl_fresh = load_nvidia_dynamic_lib(libname)
34+
if loaded_dl_fresh.was_already_loaded_from_elsewhere:
35+
raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere")
36+
37+
_validate_abs_path(loaded_dl_fresh.abs_path)
38+
assert loaded_dl_fresh.found_via is not None
39+
40+
loaded_dl_from_cache = load_nvidia_dynamic_lib(libname)
41+
if loaded_dl_from_cache is not loaded_dl_fresh:
42+
raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh")
43+
44+
loaded_dl_no_cache = _load_lib_no_cache(libname)
45+
supported_libs = SUPPORTED_WINDOWS_DLLS if IS_WINDOWS else SUPPORTED_LINUX_SONAMES
46+
if not loaded_dl_no_cache.was_already_loaded_from_elsewhere and libname in supported_libs:
47+
raise RuntimeError("not loaded_dl_no_cache.was_already_loaded_from_elsewhere")
48+
if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path):
49+
raise RuntimeError(f"not os.path.samefile({loaded_dl_no_cache.abs_path=!r}, {loaded_dl_fresh.abs_path=!r})")
50+
_validate_abs_path(loaded_dl_no_cache.abs_path)
51+
return loaded_dl_fresh.abs_path
52+
53+
54+
def probe_load_nvidia_dynamic_lib_and_print_json(libname: str) -> None:
55+
from cuda.pathfinder import DynamicLibNotFoundError
56+
57+
try:
58+
abs_path = _load_nvidia_dynamic_lib_for_test(libname)
59+
except DynamicLibNotFoundError:
60+
sys.stdout.write(f"{DYNAMIC_LIB_NOT_FOUND_MARKER}\n")
61+
traceback.print_exc(file=sys.stdout)
62+
return
63+
sys.stdout.write(f"{json.dumps(abs_path)}\n")
64+
65+
66+
def main(argv: Sequence[str] | None = None) -> int:
67+
args = list(sys.argv[1:] if argv is None else argv)
68+
if len(args) != 1:
69+
raise SystemExit("Usage: python -m cuda.pathfinder._testing.load_nvidia_dynamic_lib_subprocess <libname>")
70+
probe_load_nvidia_dynamic_lib_and_print_json(args[0])
71+
return 0
72+
73+
74+
if __name__ == "__main__":
75+
raise SystemExit(main())

cuda_pathfinder/cuda/pathfinder/_utils/spawned_process_runner.py

Lines changed: 0 additions & 131 deletions
This file was deleted.
Lines changed: 34 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,50 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4-
# This helper is factored out so spawned child processes only import this
5-
# lightweight module. That avoids re-importing the test module (and
6-
# repeating its potentially expensive setup) in every child process.
4+
from __future__ import annotations
75

8-
import json
9-
import os
6+
import subprocess
107
import sys
11-
import traceback
8+
from pathlib import Path
129

10+
from cuda.pathfinder._testing.load_nvidia_dynamic_lib_subprocess import DYNAMIC_LIB_NOT_FOUND_MARKER
1311

14-
def build_child_process_failed_for_libname_message(libname, result):
12+
LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODULE = "cuda.pathfinder._testing.load_nvidia_dynamic_lib_subprocess"
13+
LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD = Path(__file__).resolve().parents[1]
14+
PROCESS_TIMED_OUT = -9
15+
16+
17+
def build_child_process_failed_for_libname_message(libname: str, result: subprocess.CompletedProcess[str]) -> str:
1518
return (
1619
f"Child process failed for {libname=!r} with exit code {result.returncode}\n"
1720
f"--- stdout-from-child-process ---\n{result.stdout}<end-of-stdout-from-child-process>\n"
1821
f"--- stderr-from-child-process ---\n{result.stderr}<end-of-stderr-from-child-process>\n"
1922
)
2023

2124

22-
def validate_abs_path(abs_path):
23-
assert abs_path, f"empty path: {abs_path=!r}"
24-
assert os.path.isabs(abs_path), f"not absolute: {abs_path=!r}"
25-
assert os.path.isfile(abs_path), f"not a file: {abs_path=!r}"
25+
def child_process_reported_dynamic_lib_not_found(result: subprocess.CompletedProcess[str]) -> bool:
26+
return result.stdout.startswith(DYNAMIC_LIB_NOT_FOUND_MARKER)
2627

2728

28-
def child_process_func(libname):
29-
from cuda.pathfinder import DynamicLibNotFoundError, load_nvidia_dynamic_lib
30-
from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import _load_lib_no_cache
31-
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
32-
SUPPORTED_LINUX_SONAMES,
33-
SUPPORTED_WINDOWS_DLLS,
34-
)
35-
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
36-
29+
def run_load_nvidia_dynamic_lib_in_subprocess(
30+
libname: str,
31+
*,
32+
timeout: float,
33+
) -> subprocess.CompletedProcess[str]:
34+
command = [sys.executable, "-m", LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_MODULE, libname]
3735
try:
38-
loaded_dl_fresh = load_nvidia_dynamic_lib(libname)
39-
except DynamicLibNotFoundError:
40-
print("CHILD_LOAD_NVIDIA_DYNAMIC_LIB_HELPER_DYNAMIC_LIB_NOT_FOUND_ERROR:")
41-
traceback.print_exc(file=sys.stdout)
42-
return
43-
if loaded_dl_fresh.was_already_loaded_from_elsewhere:
44-
raise RuntimeError("loaded_dl_fresh.was_already_loaded_from_elsewhere")
45-
validate_abs_path(loaded_dl_fresh.abs_path)
46-
assert loaded_dl_fresh.found_via is not None
47-
48-
loaded_dl_from_cache = load_nvidia_dynamic_lib(libname)
49-
if loaded_dl_from_cache is not loaded_dl_fresh:
50-
raise RuntimeError("loaded_dl_from_cache is not loaded_dl_fresh")
51-
52-
loaded_dl_no_cache = _load_lib_no_cache(libname)
53-
# check_if_already_loaded_from_elsewhere relies on these:
54-
supported_libs = SUPPORTED_WINDOWS_DLLS if IS_WINDOWS else SUPPORTED_LINUX_SONAMES
55-
if not loaded_dl_no_cache.was_already_loaded_from_elsewhere and libname in supported_libs:
56-
raise RuntimeError("not loaded_dl_no_cache.was_already_loaded_from_elsewhere")
57-
if not os.path.samefile(loaded_dl_no_cache.abs_path, loaded_dl_fresh.abs_path):
58-
raise RuntimeError(f"not os.path.samefile({loaded_dl_no_cache.abs_path=!r}, {loaded_dl_fresh.abs_path=!r})")
59-
validate_abs_path(loaded_dl_no_cache.abs_path)
60-
61-
print(json.dumps(loaded_dl_fresh.abs_path))
36+
return subprocess.run( # noqa: S603 - trusted argv: current interpreter + internal test helper module
37+
command,
38+
capture_output=True,
39+
text=True,
40+
timeout=timeout,
41+
check=False,
42+
cwd=LOAD_NVIDIA_DYNAMIC_LIB_SUBPROCESS_CWD,
43+
)
44+
except subprocess.TimeoutExpired:
45+
return subprocess.CompletedProcess(
46+
args=command,
47+
returncode=PROCESS_TIMED_OUT,
48+
stdout="",
49+
stderr=f"Process timed out after {timeout} seconds and was terminated.",
50+
)

cuda_pathfinder/tests/test_driver_lib_loading.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
import os
1313

1414
import pytest
15-
from child_load_nvidia_dynamic_lib_helper import build_child_process_failed_for_libname_message, child_process_func
15+
from child_load_nvidia_dynamic_lib_helper import (
16+
build_child_process_failed_for_libname_message,
17+
child_process_reported_dynamic_lib_not_found,
18+
run_load_nvidia_dynamic_lib_in_subprocess,
19+
)
1620

1721
from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
1822
from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL
@@ -22,7 +26,6 @@
2226
_load_lib_no_cache,
2327
)
2428
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS, quote_for_shell
25-
from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process
2629

2730
STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works")
2831
assert STRICTNESS in ("see_what_works", "all_must_work")
@@ -119,27 +122,27 @@ def test_load_lib_no_cache_does_not_dispatch_ctk_lib_to_driver_path(mocker):
119122

120123

121124
# ---------------------------------------------------------------------------
122-
# Real loading tests (spawned child process for isolation)
125+
# Real loading tests (dedicated subprocess for isolation)
123126
# ---------------------------------------------------------------------------
124127

125128

126129
@pytest.mark.parametrize("libname", sorted(_DRIVER_ONLY_LIBNAMES))
127130
def test_real_load_driver_lib(info_summary_append, libname):
128-
"""Load a real driver library in a child process.
131+
"""Load a real driver library in a dedicated subprocess.
129132
130133
This complements the mock tests above: it exercises the actual OS
131134
loader path and logs results via INFO for CI/QA inspection.
132135
"""
133136
timeout = 120 if IS_WINDOWS else 30
134-
result = run_in_spawned_child_process(child_process_func, args=(libname,), timeout=timeout)
137+
result = run_load_nvidia_dynamic_lib_in_subprocess(libname, timeout=timeout)
135138

136139
def raise_child_process_failed():
137140
raise RuntimeError(build_child_process_failed_for_libname_message(libname, result))
138141

139142
if result.returncode != 0:
140143
raise_child_process_failed()
141144
assert not result.stderr
142-
if result.stdout.startswith("CHILD_LOAD_NVIDIA_DYNAMIC_LIB_HELPER_DYNAMIC_LIB_NOT_FOUND_ERROR:"):
145+
if child_process_reported_dynamic_lib_not_found(result):
143146
if STRICTNESS == "all_must_work":
144147
raise_child_process_failed()
145148
info_summary_append(f"Not found: {libname=!r}")

0 commit comments

Comments
 (0)