Skip to content

Commit

Permalink
Merge pull request NVIDIA#363 from vzhurba01/wheels-lib-loading
Browse files Browse the repository at this point in the history
Enable discovery and loading at run time of NVRTC and nvJitLink libraries in a wheels ecosystem
  • Loading branch information
leofang authored Jan 9, 2025
2 parents e774b32 + c136130 commit 61ef224
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 21 deletions.
46 changes: 40 additions & 6 deletions cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
# this software and related documentation outside the terms of the EULA
# is strictly prohibited.
{{if 'Windows' == platform.system()}}
import win32api
import os
import site
import struct
import win32api
from pywintypes import error
{{else}}
cimport cuda.bindings._lib.dlfcn as dlfcn
Expand Down Expand Up @@ -44,16 +46,48 @@ cdef int cuPythonInit() except -1 nogil:

# Load library
{{if 'Windows' == platform.system()}}
LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000
with gil:
# First check if the DLL has been loaded by 3rd parties
try:
handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
handle = win32api.GetModuleHandle("nvrtc64_120_0.dll")
except:
handle = None

# Else try default search
if not handle:
LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000
try:
handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
except:
pass

# Final check if DLLs can be found within pip installations
if not handle:
site_packages = [site.getusersitepackages()] + site.getsitepackages()
for sp in site_packages:
mod_path = os.path.join(sp, "nvidia", "cuda_nvrtc", "bin")
if not os.path.isdir(mod_path):
continue
os.add_dll_directory(mod_path)
LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000
LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100
try:
handle = win32api.LoadLibraryEx(
# Note: LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR needs an abs path...
os.path.join(mod_path, "nvrtc64_120_0.dll"),
0, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR)

# Note: nvrtc64_120_0.dll calls into nvrtc-builtins64_*.dll which is
# located in the same mod_path.
# Update PATH environ so that the two dlls can find each other
os.environ["PATH"] = os.pathsep.join((os.environ.get("PATH", ""), mod_path))
except:
pass

if not handle:
raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll')
{{else}}
handle = NULL
if handle == NULL:
handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW)
handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW)
if handle == NULL:
with gil:
raise RuntimeError('Failed to dlopen libnvrtc.so.12')
Expand Down
20 changes: 19 additions & 1 deletion cuda_bindings/docs/source/release/12.x.y-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,22 @@
Released on MM DD, 20YY.

## Highlights
- Added bindings for nvJitLink. It requires nvJitLink from CUDA 12.3 or above.
- Add bindings for nvJitLink. It requires nvJitLink from CUDA 12.3 or above.
- Add optional dependencies to wheels for NVRTC and nvJitLink
- Enable discovery and loading of shared library dependencies from wheels

## Wheels support for optional dependencies

Optional dependencies are added for packages:

- nvidia-nvjitlink-cuXX
- nvidia-cuda-nvrtc-cuXX

Installing these dependencies with cuda-python can be done using:
```{code-block} shell
pip install cuda-python[all]
```

## Discovery and loading of shared library dependencies from wheels

Shared library search paths for wheel builds are now extended to check site-packages. This allows users to seamlessly use their wheel installation of the CUDA Toolkit with cuda-python.
6 changes: 6 additions & 0 deletions cuda_bindings/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ dependencies = [
"pywin32; sys_platform == 'win32'",
]

[project.optional-dependencies]
all = [
"nvidia-cuda-nvrtc-cu12",
"nvidia-nvjitlink-cu12>=12.3"
]

[project.urls]
Repository = "https://github.com/NVIDIA/cuda-python"
Documentation = "https://nvidia.github.io/cuda-python/"
Expand Down
54 changes: 40 additions & 14 deletions cuda_bindings/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,14 @@
from Cython.Build import cythonize
from pyclibrary import CParser
from setuptools import find_packages, setup
from setuptools.command.bdist_wheel import bdist_wheel
from setuptools.command.build_ext import build_ext
from setuptools.extension import Extension

# ----------------------------------------------------------------------
# Fetch configuration options

CUDA_HOME = os.environ.get("CUDA_HOME")
if not CUDA_HOME:
CUDA_HOME = os.environ.get("CUDA_PATH")
CUDA_HOME = os.environ.get("CUDA_HOME", os.environ.get("CUDA_PATH", None))
if not CUDA_HOME:
raise RuntimeError("Environment variable CUDA_HOME or CUDA_PATH is not set")

Expand Down Expand Up @@ -283,24 +282,51 @@ def do_cythonize(extensions):
extensions += prep_extensions(sources)

# ---------------------------------------------------------------------
# Custom build_ext command
# Files are build in two steps:
# 1) Cythonized (in the do_cythonize() command)
# 2) Compiled to .o files as part of build_ext
# This class is solely for passing the value of nthreads to build_ext
# Custom cmdclass extensions

building_wheel = False


class WheelsBuildExtensions(bdist_wheel):
def run(self):
global building_wheel
building_wheel = True
super().run()


class ParallelBuildExtensions(build_ext):
def initialize_options(self):
build_ext.initialize_options(self)
super().initialize_options()
if nthreads > 0:
self.parallel = nthreads

def finalize_options(self):
build_ext.finalize_options(self)


cmdclass = {"build_ext": ParallelBuildExtensions}
def build_extension(self, ext):
if building_wheel and sys.platform == "linux":
# Strip binaries to remove debug symbols
extra_linker_flags = ["-Wl,--strip-all"]

# Allow extensions to discover libraries at runtime
# relative their wheels installation.
if ext.name == "cuda.bindings._bindings.cynvrtc":
ldflag = f"-Wl,--disable-new-dtags,-rpath,$ORIGIN/../../../nvidia/cuda_nvrtc/lib"
elif ext.name == "cuda.bindings._internal.nvjitlink":
ldflag = f"-Wl,--disable-new-dtags,-rpath,$ORIGIN/../../../nvidia/nvjitlink/lib"
else:
ldflag = None

if ldflag:
extra_linker_flags.append(ldflag)
else:
extra_linker_flags = []

ext.extra_link_args += extra_linker_flags
super().build_extension(ext)


cmdclass = {
"bdist_wheel": WheelsBuildExtensions,
"build_ext": ParallelBuildExtensions,
}

# ----------------------------------------------------------------------
# Setup
Expand Down

0 comments on commit 61ef224

Please sign in to comment.