Skip to content

WIP+WIP: PR #604 + use path_finder in bindings (nvJitLink, nvrtc, nvvm) #610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
8c9a2de
WIP (search priority updated in README.md but not in code)
rwgk May 4, 2025
8479511
Merge branch 'path_finder_dev' into path_finder_search_priority_v2
rwgk May 4, 2025
2cf3fa2
Completely replace cuda_paths.py to achieve the desired Search Priori…
rwgk May 4, 2025
2b74022
Define `IS_WINDOWS = sys.platform == "win32"` in supported_libs.py
rwgk May 5, 2025
27db0a7
Use os.path.samefile() to resolve issues with doubled backslashes.
rwgk May 5, 2025
7d8ab70
Change nvJitLink, nvrtc, nvvm bindings to use path_finder
rwgk May 5, 2025
e0a0143
Merge branch 'path_finder_dev' into path_finder_search_priority_v2
rwgk May 5, 2025
1f728c0
`load_in_subprocess(): Pass current environment
rwgk May 5, 2025
0d23bb6
Add run_python_code_safely.py as generated by perplexity, plus ruff f…
rwgk May 5, 2025
b1a5e9d
Replace subprocess.run with run_python_code_safely
rwgk May 5, 2025
8e9c7b1
Factor out `class Worker` to fix pickle issue.
rwgk May 5, 2025
5977b9d
ChatGPT revisions based on Deep research:
rwgk May 5, 2025
44eb27e
Merge branch 'path_finder_search_priority_v2' into path_finder_search…
rwgk May 5, 2025
9b474bc
Fix race condition in result queue handling by using timeout-based get()
rwgk May 5, 2025
34c2874
Merge branch 'path_finder_search_priority_v2' into path_finder_search…
rwgk May 5, 2025
ab00a87
Resolve SIM108
rwgk May 5, 2025
2a039d2
Change to "nppc" as ANCHOR_LIBNAME
rwgk May 5, 2025
f978e67
Implement CUDA_PYTHON_CUDA_HOME_PRIORITY first, last, with default first
rwgk May 6, 2025
782fcf6
Remove retry_with_anchor_abs_path() and make retry_with_cuda_home_pri…
rwgk May 6, 2025
4379bb5
Merge branch 'path_finder_search_priority_v2' into path_finder_search…
rwgk May 6, 2025
ad9e994
Restore nvvm-related LD_LIBRARY_PATH, PATH manipulations from main br…
rwgk May 6, 2025
a1b553b
Update README.md to reflect new search priority
rwgk May 6, 2025
676ecb2
Update README.md to reflect new search priority
rwgk May 6, 2025
a309dd4
Merge branch 'path_finder_search_priority_v2' into path_finder_search…
rwgk May 6, 2025
73498c0
SUPPORTED_LINUX_SONAMES does not need updates for CTK 12.9.0
rwgk May 6, 2025
7661c13
The only addition to SUPPORTED_WINDOWS_DLLS for CTK 12.9.0 is nvvm70.dll
rwgk May 6, 2025
ddea021
Make OSError in load_dl_windows.py abs_path_for_dynamic_library() mor…
rwgk May 6, 2025
edc5b33
Merge branch 'path_finder_search_priority_v2' into path_finder_search…
rwgk May 6, 2025
55583d9
run_cuda_bindings_path_finder.py: optionally use args as libnames (to…
rwgk May 6, 2025
a576327
Bug fix in load_dl_windows.py: ctypes.windll.kernel32.LoadLibraryW() …
rwgk May 6, 2025
f4225ed
Merge branch 'path_finder_search_priority_v2' into path_finder_search…
rwgk May 6, 2025
5fb2d1f
Remove _find_nvidia_dynamic_library.retry_with_anchor_abs_path() meth…
rwgk May 6, 2025
c620e30
Merge branch 'path_finder_search_priority_v2' into path_finder_search…
rwgk May 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 9 additions & 55 deletions cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
# This code was automatically generated with version 12.9.0. Do not modify it directly.
{{if 'Windows' == platform.system()}}
import os
import site
import struct
import win32api
from pywintypes import error
{{else}}
cimport cuda.bindings._lib.dlfcn as dlfcn
from libc.stdint cimport uintptr_t
{{endif}}
from cuda.bindings import path_finder

from libc.stdint cimport intptr_t

Expand Down Expand Up @@ -48,65 +47,18 @@ cdef bint __cuPythonInit = False
{{if 'nvrtcSetFlowCallback' in found_functions}}cdef void *__nvrtcSetFlowCallback = NULL{{endif}}

cdef int cuPythonInit() except -1 nogil:
{{if 'Windows' != platform.system()}}
cdef void* handle = NULL
{{endif}}

global __cuPythonInit
if __cuPythonInit:
return 0
__cuPythonInit = True

# Load library
{{if 'Windows' == platform.system()}}
with gil:
# First check if the DLL has been loaded by 3rd parties
try:
handle = win32api.GetModuleHandle("nvrtc64_120_0.dll")
except:
handle = None

# Check if DLLs can be found within pip installations
if not handle:
LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000
LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100
site_packages = [site.getusersitepackages()] + site.getsitepackages()
for sp in site_packages:
mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin")
if os.path.isdir(mod_path):
os.add_dll_directory(mod_path)
try:
handle = win32api.LoadLibraryEx(
# Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
os.path.join(mod_path, "nvrtc64_120_0.dll"),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)

# Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is
# located in the same mod_path.
# Update PATH environ so that the two dlls can find each other
os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path))
except:
pass
else:
break
else:
# Else try default search
# Only reached if DLL wasn't found in any site-package path
LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000
try:
handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
except:
pass

if not handle:
raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll')
{{else}}
handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW)
if handle == NULL:
with gil:
raise RuntimeError('Failed to dlopen libnvrtc.so.12')
{{endif}}


# Load function
{{if 'Windows' == platform.system()}}
with gil:
handle = path_finder._load_nvidia_dynamic_library("nvrtc").handle
{{if 'nvrtcGetErrorString' in found_functions}}
try:
global __nvrtcGetErrorString
Expand Down Expand Up @@ -291,6 +243,8 @@ cdef int cuPythonInit() except -1 nogil:
{{endif}}

{{else}}
with gil:
handle = <void*><uintptr_t>path_finder._load_nvidia_dynamic_library("nvrtc").handle
{{if 'nvrtcGetErrorString' in found_functions}}
global __nvrtcGetErrorString
__nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString')
Expand Down
20 changes: 6 additions & 14 deletions cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
#
# This code was automatically generated across versions from 12.0.1 to 12.9.0. Do not modify it directly.

from libc.stdint cimport intptr_t

from .utils cimport get_nvjitlink_dso_version_suffix
from libc.stdint cimport intptr_t, uintptr_t

from .utils import FunctionNotFoundError, NotSupportedError

from cuda.bindings import path_finder

###############################################################################
# Extern
###############################################################################
Expand Down Expand Up @@ -52,17 +52,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL
cdef void* __nvJitLinkVersion = NULL


cdef void* load_library(const int driver_ver) except* with gil:
cdef void* handle
for suffix in get_nvjitlink_dso_version_suffix(driver_ver):
so_name = "libnvJitLink.so" + (f".{suffix}" if suffix else suffix)
handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL)
if handle != NULL:
break
else:
err_msg = dlerror()
raise RuntimeError(f'Failed to dlopen libnvJitLink ({err_msg.decode()})')
return handle
cdef void* load_library(int driver_ver) except* with gil:
cdef uintptr_t handle = path_finder._load_nvidia_dynamic_library("nvJitLink").handle
return <void*>handle


cdef int _check_or_init_nvjitlink() except -1 nogil:
Expand Down
53 changes: 8 additions & 45 deletions cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@

from libc.stdint cimport intptr_t

from .utils cimport get_nvjitlink_dso_version_suffix

from .utils import FunctionNotFoundError, NotSupportedError

import os
import site
from cuda.bindings import path_finder

import win32api

Expand Down Expand Up @@ -42,44 +39,9 @@ cdef void* __nvJitLinkGetInfoLog = NULL
cdef void* __nvJitLinkVersion = NULL


cdef inline list get_site_packages():
return [site.getusersitepackages()] + site.getsitepackages()


cdef load_library(const int driver_ver):
handle = 0

for suffix in get_nvjitlink_dso_version_suffix(driver_ver):
if len(suffix) == 0:
continue
dll_name = f"nvJitLink_{suffix}0_0.dll"

# First check if the DLL has been loaded by 3rd parties
try:
return win32api.GetModuleHandle(dll_name)
except:
pass

# Next, check if DLLs are installed via pip
for sp in get_site_packages():
mod_path = os.path.join(sp, "nvidia", "nvJitLink", "bin")
if os.path.isdir(mod_path):
os.add_dll_directory(mod_path)
try:
return win32api.LoadLibraryEx(
# Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
os.path.join(mod_path, dll_name),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)
except:
pass
# Finally, try default search
# Only reached if DLL wasn't found in any site-package path
try:
return win32api.LoadLibrary(dll_name)
except:
pass

raise RuntimeError('Failed to load nvJitLink')
cdef void* load_library(int driver_ver) except* with gil:
cdef intptr_t handle = path_finder._load_nvidia_dynamic_library("nvJitLink").handle
return <void*>handle


cdef int _check_or_init_nvjitlink() except -1 nogil:
Expand All @@ -88,23 +50,24 @@ cdef int _check_or_init_nvjitlink() except -1 nogil:
return 0

cdef int err, driver_ver
cdef intptr_t handle
with gil:
# Load driver to check version
try:
handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
except Exception as e:
raise NotSupportedError(f'CUDA driver is not found ({e})')
global __cuDriverGetVersion
if __cuDriverGetVersion == NULL:
__cuDriverGetVersion = <void*><intptr_t>win32api.GetProcAddress(handle, 'cuDriverGetVersion')
__cuDriverGetVersion = <void*><intptr_t>win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion')
if __cuDriverGetVersion == NULL:
raise RuntimeError('something went wrong')
err = (<int (*)(int*) noexcept nogil>__cuDriverGetVersion)(&driver_ver)
if err != 0:
raise RuntimeError('something went wrong')

# Load library
handle = load_library(driver_ver)
handle = <intptr_t>load_library(driver_ver)

# Load function
global __nvJitLinkCreate
Expand Down
18 changes: 5 additions & 13 deletions cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
#
# This code was automatically generated across versions from 11.0.3 to 12.9.0. Do not modify it directly.

from libc.stdint cimport intptr_t

from .utils cimport get_nvvm_dso_version_suffix
from libc.stdint cimport intptr_t, uintptr_t

from .utils import FunctionNotFoundError, NotSupportedError

from cuda.bindings import path_finder

###############################################################################
# Extern
###############################################################################
Expand Down Expand Up @@ -51,16 +51,8 @@ cdef void* __nvvmGetProgramLog = NULL


cdef void* load_library(const int driver_ver) except* with gil:
cdef void* handle
for suffix in get_nvvm_dso_version_suffix(driver_ver):
so_name = "libnvvm.so" + (f".{suffix}" if suffix else suffix)
handle = dlopen(so_name.encode(), RTLD_NOW | RTLD_GLOBAL)
if handle != NULL:
break
else:
err_msg = dlerror()
raise RuntimeError(f'Failed to dlopen libnvvm ({err_msg.decode()})')
return handle
cdef uintptr_t handle = path_finder._load_nvidia_dynamic_library("nvvm").handle
return <void*>handle


cdef int _check_or_init_nvvm() except -1 nogil:
Expand Down
61 changes: 8 additions & 53 deletions cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@

from libc.stdint cimport intptr_t

from .utils cimport get_nvvm_dso_version_suffix

from .utils import FunctionNotFoundError, NotSupportedError

import os
import site
from cuda.bindings import path_finder

import win32api

Expand Down Expand Up @@ -40,52 +37,9 @@ cdef void* __nvvmGetProgramLogSize = NULL
cdef void* __nvvmGetProgramLog = NULL


cdef inline list get_site_packages():
return [site.getusersitepackages()] + site.getsitepackages() + ["conda"]


cdef load_library(const int driver_ver):
handle = 0

for suffix in get_nvvm_dso_version_suffix(driver_ver):
if len(suffix) == 0:
continue
dll_name = "nvvm64_40_0.dll"

# First check if the DLL has been loaded by 3rd parties
try:
return win32api.GetModuleHandle(dll_name)
except:
pass

# Next, check if DLLs are installed via pip or conda
for sp in get_site_packages():
if sp == "conda":
# nvvm is not under $CONDA_PREFIX/lib, so it's not in the default search path
conda_prefix = os.environ.get("CONDA_PREFIX")
if conda_prefix is None:
continue
mod_path = os.path.join(conda_prefix, "Library", "nvvm", "bin")
else:
mod_path = os.path.join(sp, "nvidia", "cuda_nvcc", "nvvm", "bin")
if os.path.isdir(mod_path):
os.add_dll_directory(mod_path)
try:
return win32api.LoadLibraryEx(
# Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
os.path.join(mod_path, dll_name),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)
except:
pass

# Finally, try default search
# Only reached if DLL wasn't found in any site-package path
try:
return win32api.LoadLibrary(dll_name)
except:
pass

raise RuntimeError('Failed to load nvvm')
cdef void* load_library(int driver_ver) except* with gil:
cdef intptr_t handle = path_finder._load_nvidia_dynamic_library("nvvm").handle
return <void*>handle


cdef int _check_or_init_nvvm() except -1 nogil:
Expand All @@ -94,23 +48,24 @@ cdef int _check_or_init_nvvm() except -1 nogil:
return 0

cdef int err, driver_ver
cdef intptr_t handle
with gil:
# Load driver to check version
try:
handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
nvcuda_handle = win32api.LoadLibraryEx("nvcuda.dll", 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
except Exception as e:
raise NotSupportedError(f'CUDA driver is not found ({e})')
global __cuDriverGetVersion
if __cuDriverGetVersion == NULL:
__cuDriverGetVersion = <void*><intptr_t>win32api.GetProcAddress(handle, 'cuDriverGetVersion')
__cuDriverGetVersion = <void*><intptr_t>win32api.GetProcAddress(nvcuda_handle, 'cuDriverGetVersion')
if __cuDriverGetVersion == NULL:
raise RuntimeError('something went wrong')
err = (<int (*)(int*) noexcept nogil>__cuDriverGetVersion)(&driver_ver)
if err != 0:
raise RuntimeError('something went wrong')

# Load library
handle = load_library(driver_ver)
handle = <intptr_t>load_library(driver_ver)

# Load function
global __nvvmVersion
Expand Down
3 changes: 0 additions & 3 deletions cuda_bindings/cuda/bindings/_internal/utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj,

cdef bint is_nested_sequence(data)
cdef void* get_buffer_pointer(buf, Py_ssize_t size, readonly=*) except*

cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver)
cdef tuple get_nvvm_dso_version_suffix(int driver_ver)
14 changes: 0 additions & 14 deletions cuda_bindings/cuda/bindings/_internal/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -127,17 +127,3 @@ cdef int get_nested_resource_ptr(nested_resource[ResT] &in_out_ptr, object obj,
class FunctionNotFoundError(RuntimeError): pass

class NotSupportedError(RuntimeError): pass


cdef tuple get_nvjitlink_dso_version_suffix(int driver_ver):
if 12000 <= driver_ver < 13000:
return ('12', '')
raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported')


cdef tuple get_nvvm_dso_version_suffix(int driver_ver):
if 11000 <= driver_ver < 11020:
return ('3', '')
if 11020 <= driver_ver < 13000:
return ('4', '')
raise NotSupportedError(f'CUDA driver version {driver_ver} is not supported')
Loading