Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add feature to require kernel module #764

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions internal/config/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ type features struct {
// DisableImexChannelCreation ensures that the implicit creation of
// requested IMEX channels is skipped when invoking the nvidia-container-cli.
DisableImexChannelCreation *feature `toml:"disable-imex-channel-creation,omitempty"`
// RequireNvidiaKernelModules indicates that the NVIDIA kernel module must be
// loaded for the NVIDIA Container Runtime to perform any OCI spec modifications.
RequireNvidiaKernelModules *feature `toml:"require-nvidia-kernel-module,omitempty"`
}

//nolint:unused
Expand Down
19 changes: 19 additions & 0 deletions internal/runtime/runtime_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package runtime

import (
"fmt"
"os"

"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
Expand All @@ -41,6 +42,11 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
return lowLevelRuntime, nil
}

if cfg.Features.RequireNvidiaKernelModules.IsEnabled() && !isNvidiaModuleLoaded() {
logger.Tracef("NVIDIA driver modules are not yet loaded; skipping modifer")
return lowLevelRuntime, nil
}

ociSpec, err := oci.NewSpec(logger, argv)
if err != nil {
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
Expand All @@ -62,6 +68,19 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
return r, nil
}

// isNvidiaKernelModuleLoaded checks whether the NVIDIA GPU driver is installed
// and the kernel module is available.
func isNvidiaModuleLoaded() bool {
// TODO: This was implemented as:
// cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1
// if [ "${?}" != "0" ]; then
// echo "nvidia driver modules are not yet loaded, invoking runc directly"
// exec runc "$@"
// fi
_, err := os.Stat("/proc/driver/nvidia/version")
return err == nil
}

// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
rawSpec, err := ociSpec.Load()
Expand Down
6 changes: 0 additions & 6 deletions tools/container/toolkit/executable.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ type executable struct {
source string
target executableTarget
env map[string]string
preLines []string
argLines []string
}

Expand Down Expand Up @@ -96,11 +95,6 @@ func (e executable) writeWrapperTo(wrapper io.Writer, destFolder string, dotfile
// Add the shebang
fmt.Fprintln(wrapper, "#! /bin/sh")

// Add the preceding lines if any
for _, line := range e.preLines {
fmt.Fprintf(wrapper, "%s\n", r.apply(line))
}

// Update the path to include the destination folder
var env map[string]string
if e.env == nil {
Expand Down
17 changes: 0 additions & 17 deletions tools/container/toolkit/executable_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,23 +59,6 @@ func TestWrapper(t *testing.T) {
"",
},
},
{
e: executable{
preLines: []string{
"preline1",
"preline2",
},
},
expectedLines: []string{
shebang,
"preline1",
"preline2",
"PATH=/dest/folder:$PATH \\",
"source.real \\",
"\t\"$@\"",
"",
},
},
{
e: executable{
argLines: []string{
Expand Down
17 changes: 3 additions & 14 deletions tools/container/toolkit/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,27 +57,16 @@ func newNvidiaContainerRuntimeInstaller(source string) *executable {
}

func newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable {
preLines := []string{
"",
"cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1",
"if [ \"${?}\" != \"0\" ]; then",
" echo \"nvidia driver modules are not yet loaded, invoking runc directly\"",
" exec runc \"$@\"",
"fi",
"",
}

runtimeEnv := make(map[string]string)
runtimeEnv["XDG_CONFIG_HOME"] = filepath.Join(destDirPattern, ".config")
for k, v := range env {
runtimeEnv[k] = v
}

r := executable{
source: source,
target: target,
env: runtimeEnv,
preLines: preLines,
source: source,
target: target,
env: runtimeEnv,
}

return &r
Expand Down
7 changes: 0 additions & 7 deletions tools/container/toolkit/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,6 @@ func TestNvidiaContainerRuntimeInstallerWrapper(t *testing.T) {

expectedLines := []string{
shebang,
"",
"cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1",
"if [ \"${?}\" != \"0\" ]; then",
" echo \"nvidia driver modules are not yet loaded, invoking runc directly\"",
" exec runc \"$@\"",
"fi",
"",
"PATH=/dest/folder:$PATH \\",
"XDG_CONFIG_HOME=/dest/folder/.config \\",
"source.real \\",
Expand Down
2 changes: 2 additions & 0 deletions tools/container/toolkit/toolkit.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,8 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai
configValues["nvidia-container-runtime.runtimes"] = toolkitRuntimeList
}

// We require the NVIDIA kernel modules to be loaded.
configValues["features.require-nvidia-kernel-modules"] = true
for _, optInFeature := range opts.optInFeatures.Value() {
configValues["features."+optInFeature] = true
}
Expand Down