Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,624 changes: 1,875 additions & 1,749 deletions deps.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions go.work
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ use (
./examples/hello-world-service/src
./nvidia-gpu/nvidia-container-toolkit/nvidia-container-runtime-wrapper
./nvidia-gpu/nvidia-container-toolkit/nvidia-persistenced-wrapper
./nvidia-gpu/nvidia-fabricmanager/nvidia-fabricmanager-wrapper
)
45 changes: 19 additions & 26 deletions nvidia-gpu/nvidia-fabricmanager/lts/nvidia-fabricmanager.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# https://docs.nvidia.com/datacenter/tesla/pdf/fabric-manager-user-guide.pdf
name: nvidia-fabricmanager
container:
entrypoint: /usr/local/bin/nv-fabricmanager
args:
- --config
- /usr/local/share/nvidia/nvswitch/fabricmanager.cfg
entrypoint: /usr/bin/nvidia-fabricmanager-wrapper
mounts:
# device files
- source: /dev
Expand All @@ -28,44 +25,40 @@ container:
options:
- bind
- ro
# nvidia libraries
- source: /usr/local/lib
destination: /usr/local/lib
type: bind
options:
- bind
- ro
# service state file
# * nvlsm:
# - pid file that can't be disabled
# - unix socket /var/run/nvidia-fabricmanager/fm_sm_ipc.socket
# can't be changed, path is hardcoded into fabricmanager
# * fabricmanager
# - state file
# - database files
- source: /var/run/nvidia-fabricmanager
destination: /var/run/nvidia-fabricmanager
destination: /var/run
type: bind
options:
- rshared
- rbind
- rw
# log files
- source: /var/log
destination: /var/log
# service cache file
# * nvlsm: database files
- source: /var/cache/nvidia-fabricmanager
destination: /var/cache
type: bind
options:
- rshared
- rbind
- rw
# fabric topology files
- source: /usr/local/share/nvidia/nvswitch
destination: /usr/local/share/nvidia/nvswitch
# service log files
# * nvlsm:
# - mandatory dump files hardcoded to /var/log/<file>, so /var/log must be writable
- source: /var/log/nvidia-fabricmanager
destination: /var/log
type: bind
options:
- rshared
- rbind
- ro
# binaries
- source: /usr/local/bin
destination: /usr/local/bin
type: bind
options:
- bind
- ro
- rw
depends:
- service: cri
# we need to depend on udevd so that the nvidia device files are created
Expand Down
69 changes: 50 additions & 19 deletions nvidia-gpu/nvidia-fabricmanager/lts/pkg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,48 +2,79 @@ name: nvidia-fabricmanager-lts
variant: scratch
shell: /bin/bash
dependencies:
- stage: base
- stage: base
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we don't need to do this hack anymore? since this stage doesn't seem to use anything from wolfi anymore?

Copy link
Author

@npdgm npdgm Aug 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

Sorry I had it fixed in production but not lts. Both are the same now, except for vars.
So the base stage is back to /, and Wolfi is pulled to /wolfi-base. We still need Wolfi because of the libgcc_s.so.1 file we copy to the extension rootfs, for NVLSM. No package is installed so that file is locked down by WOLFI_BASE_REF. Anyway, GCC's runtime library is quite small and portable, we could pick one from any distro or version and that would work.

- stage: nvidia-fabricmanager-gcc-runtime
- stage: nvidia-fabricmanager-wrapper
steps:
- sources:
# {{ if eq .ARCH "aarch64" }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-sbsa/fabricmanager-linux-sbsa-{{ .NVIDIA_DRIVER_LTS_VERSION }}-archive.tar.xz
destination: fabricmanager.tar.xz
sha256: {{ .NVIDIA_FABRIC_MANAGER_LTS_ARM64_SHA256 }}
sha512: {{ .NVIDIA_FABRIC_MANAGER_LTS_ARM64_SHA512 }}
- url: https://developer.download.nvidia.com/compute/cuda/redist/nvlsm/linux-sbsa/nvlsm-linux-sbsa-{{ .NVIDIA_NVLSM_LTS_VERSION }}-archive.tar.xz
destination: nvlsm.tar.xz
sha256: {{ .NVIDIA_NVLSM_LTS_ARM64_SHA256 }}
sha512: {{ .NVIDIA_NVLSM_LTS_ARM64_SHA512 }}
# {{ else }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
- url: https://developer.download.nvidia.com/compute/nvidia-driver/redist/fabricmanager/linux-x86_64/fabricmanager-linux-x86_64-{{ .NVIDIA_DRIVER_LTS_VERSION }}-archive.tar.xz
destination: fabricmanager.tar.xz
sha256: {{ .NVIDIA_FABRIC_MANAGER_LTS_AMD64_SHA256 }}
sha512: {{ .NVIDIA_FABRIC_MANAGER_LTS_AMD64_SHA512 }}
- url: https://developer.download.nvidia.com/compute/cuda/redist/nvlsm/linux-x86_64/nvlsm-linux-x86_64-{{ .NVIDIA_NVLSM_LTS_VERSION }}-archive.tar.xz
destination: nvlsm.tar.xz
sha256: {{ .NVIDIA_NVLSM_LTS_AMD64_SHA256 }}
sha512: {{ .NVIDIA_NVLSM_LTS_AMD64_SHA512 }}
# {{ end }} This in fact is YAML comment, but Go templating instruction is evaluated by bldr
prepare:
- |
tar -xf fabricmanager.tar.xz --strip-components=1
mkdir fm sm
tar -xf fabricmanager.tar.xz --strip-components=1 -C fm
tar -xf nvlsm.tar.xz --strip-components=1 -C sm
install:
- |
mkdir -p /rootfs/usr/local/bin \
/rootfs/usr/local/lib \
/rootfs/usr/local/share/nvidia/nvswitch \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager \
/rootfs/usr/local/etc/containers
mkdir -p /rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/bin \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/lib \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvswitch \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager/opt/nvidia/nvlsm/sbin \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager/opt/nvidia/nvlsm/lib \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvlsm
# nvlsm
- |
cp sm/sbin/nvlsm /rootfs/usr/local/lib/containers/nvidia-fabricmanager/opt/nvidia/nvlsm/sbin/

cp lib/libnvfm.so.1 /rootfs/usr/local/lib/libnvfm.so.1
ln -s libnvfm.so.1 /rootfs/usr/local/lib/libnvfm.so
cp sm/lib/libgrpc_mgr.so /rootfs/usr/local/lib/containers/nvidia-fabricmanager/opt/nvidia/nvlsm/lib/

cp bin/nv-fabricmanager /rootfs/usr/local/bin/
cp bin/nvswitch-audit /rootfs/usr/local/bin/
cp sm/share/nvidia/nvlsm/device_configuration.conf \
sm/share/nvidia/nvlsm/grpc_mgr.conf \
sm/share/nvidia/nvlsm/nvlsm.conf \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvlsm/
# fabricmanager
- |
cp fm/bin/nv-fabricmanager \
fm/bin/nvswitch-audit \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/bin/

cp share/nvidia/nvswitch/dgx2_hgx2_topology /rootfs/usr/local/share/nvidia/nvswitch/
cp share/nvidia/nvswitch/dgxa100_hgxa100_topology /rootfs/usr/local/share/nvidia/nvswitch/
cp fm/lib/libnvfm.so.1 /rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/lib/
ln -s libnvfm.so.1 /rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/lib/libnvfm.so

cp etc/fabricmanager.cfg /rootfs/usr/local/share/nvidia/nvswitch/
cp fm/share/nvidia/nvswitch/* \
fm/etc/fabricmanager.cfg \
/rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvswitch/

cp /pkg/nvidia-fabricmanager.yaml /rootfs/usr/local/etc/containers/nvidia-fabricmanager.yaml
sed -i 's/DAEMONIZE=.*/DAEMONIZE=0/g' /rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvswitch/fabricmanager.cfg
sed -i 's/LOG_FILE_NAME=.*/LOG_FILE_NAME=/g' /rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvswitch/fabricmanager.cfg
sed -i 's#STATE_FILE_NAME=.*#STATE_FILE_NAME=/var/run/fabricmanager.state#g' /rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvswitch/fabricmanager.cfg

sed -i 's/DAEMONIZE=.*/DAEMONIZE=0/g' /rootfs/usr/local/share/nvidia/nvswitch/fabricmanager.cfg
sed -i 's/STATE_FILE_NAME=.*/STATE_FILE_NAME=\/var\/run\/nvidia-fabricmanager\/fabricmanager.state/g' /rootfs/usr/local/share/nvidia/nvswitch/fabricmanager.cfg
sed -i 's/TOPOLOGY_FILE_PATH=.*/TOPOLOGY_FILE_PATH=\/usr\/local\/share\/nvidia\/nvswitch/g' /rootfs/usr/local/share/nvidia/nvswitch/fabricmanager.cfg
sed -i 's/DATABASE_PATH=.*/DATABASE_PATH=\/usr\/local\/share\/nvidia\/nvswitch/g' /rootfs/usr/local/share/nvidia/nvswitch/fabricmanager.cfg
if grep -q '^DATABASE_PATH=' /rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvswitch/fabricmanager.cfg
then
sed -i 's#DATABASE_PATH=.*#DATABASE_PATH=/var/run#g' /rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvswitch/fabricmanager.cfg
else
echo -e '\nDATABASE_PATH=/var/run\n' >>/rootfs/usr/local/lib/containers/nvidia-fabricmanager/usr/share/nvidia/nvswitch/fabricmanager.cfg
fi
- |
mkdir -p /rootfs/usr/local/etc/containers
cp /pkg/nvidia-fabricmanager.yaml /rootfs/usr/local/etc/containers/nvidia-fabricmanager.yaml
test:
- |
mkdir -p /extensions-validator-rootfs
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: nvidia-fabricmanager-gcc-runtime
variant: scratch
dependencies:
- image: cgr.dev/chainguard/wolfi-base@{{ .WOLFI_BASE_REF }}
steps:
- install:
- |
mkdir -p /rootfs/usr/local/lib/containers/nvidia-fabricmanager/opt/nvidia/nvlsm/lib
cp /usr/lib/libgcc_s.so.1 /rootfs/usr/local/lib/containers/nvidia-fabricmanager/opt/nvidia/nvlsm/lib/
finalize:
- from: /rootfs
to: /rootfs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
INTERNAL_PACKAGE: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module nvidia-fabricmanager-wrapper

go 1.23.0

require github.com/goaux/decowriter v1.0.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/goaux/decowriter v1.0.0 h1:f1mfBWGFIo3Upev3gswfGLQzQvC4SBVYi2ZAkNZsIaU=
github.com/goaux/decowriter v1.0.0/go.mod h1:8GKUmiBlNCYxVHU2vlZoQHwLvYh7Iw1c7/tRekJbX7o=
156 changes: 156 additions & 0 deletions nvidia-gpu/nvidia-fabricmanager/nvidia-fabricmanager-wrapper/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package main

import (
"bufio"
"context"
"fmt"
"log"
"os"
"os/exec"
"os/signal"
"path/filepath"
"strings"
"sync"
"syscall"
"time"

"github.com/goaux/decowriter"
)

const (
// FabricManager
fmCmdFile = "/usr/bin/nv-fabricmanager"
fmConfigFile = "/usr/share/nvidia/nvswitch/fabricmanager.cfg"
fmStopTimeout = 5 * time.Second

// NVLSM
smCmdFile = "/opt/nvidia/nvlsm/sbin/nvlsm"
smConfigFile = "/usr/share/nvidia/nvlsm/nvlsm.conf"
smPidFile = "/var/run/nvlsm.pid"
smSocket = "/var/run/nvidia-fabricmanager/fm_sm_ipc.socket"
smStopTimeout = 5 * time.Second
smSocketWait = 15 * time.Second
)

func runCommand(ctx context.Context, wg *sync.WaitGroup, doneCb func(), waitDelay time.Duration, path string, arg ...string) {
wg.Add(1)

cmd := exec.CommandContext(ctx, path, arg...)
cmd.WaitDelay = waitDelay
cmd.Cancel = func() error {
return cmd.Process.Signal(os.Interrupt)
}

// TODO line writer to log module
name := filepath.Base(path)
cmd.Stdout = decowriter.New(bufio.NewWriter(os.Stdout), []byte(name+": "), []byte{})
cmd.Stderr = decowriter.New(bufio.NewWriter(os.Stderr), []byte(name+": "), []byte{})

go func() {
log.Printf("nvidia-fabricmanager-wrapper: running command: %s %s\n", path, strings.Join(arg, " "))

err := cmd.Run()
if err == nil {
log.Printf("nvidia-fabricmanager-wrapper: command %s [%d] completed successfully\n", path, cmd.Process.Pid)
} else if exitErr, ok := err.(*exec.ExitError); ok {
if exitErr.Exited() {
log.Printf("nvidia-fabricmanager-wrapper: command %s [%d] exited with code %d\n", path, exitErr.Pid(),
exitErr.ExitCode())
} else {
log.Printf("nvidia-fabricmanager-wrapper: command %s [%d] was terminated\n", path, exitErr.Pid())
}
} else {
log.Printf("nvidia-fabricmanager-wrapper: failed to run command %s: %v\n", path, err)
}

wg.Done()
doneCb()
}()
}

func waitForFile(ctx context.Context, filepath string, timeout time.Duration) error {
timer := time.NewTimer(timeout)
defer timer.Stop()

for {
select {
case <-ctx.Done():
return fmt.Errorf("parent context canceled: %w", ctx.Err())
case <-timer.C:
return fmt.Errorf("timeout waiting for file")
default:
if _, err := os.Stat(filepath); err == nil {
return nil
}
time.Sleep(100 * time.Millisecond)
}
}
}

func main() {
var cmdWg sync.WaitGroup

signal.Ignore(syscall.SIGHUP)

runCtx, gracefulShutdown := context.WithCancel(context.Background())

signalsChan := make(chan os.Signal, 1)
signal.Notify(signalsChan, os.Interrupt)
signal.Notify(signalsChan, syscall.SIGTERM)

go func() {
received := <-signalsChan
signal.Stop(signalsChan)
log.Printf("nvidia-fabricmanager-wrapper: received signal '%s', initiating a graceful shutdown\n", received.String())
gracefulShutdown()
}()

nvswitchPorts := findNvswitchMgmtPorts()
for _, port := range nvswitchPorts {
log.Printf("nvidia-fabricmanager-wrapper: found NVSwitch LPF: device=%s guid=0x%x\n", port.IBDevice, port.PortGUID)
}

fmSmMgmtPortGUID := ""
if len(nvswitchPorts) > 0 {
fmSmMgmtPortGUID = fmt.Sprintf("0x%x", nvswitchPorts[0].PortGUID)
log.Printf("nvidia-fabricmanager-wrapper: using NVSwitch management port GUID: %s\n", fmSmMgmtPortGUID)
} else {
log.Println("nvidia-fabricmanager-wrapper: No InfiniBand NVSwitch detected. On Blackwell HGX baseboards and newer",
"with NVLink 5.0+, please load kernel module 'ib_umad' for NVLSM to run along FabricManager. Otherwise it will",
"fail to start with error NV_WARN_NOTHING_TO_DO, and GPU workloads will report CUDA_ERROR_SYSTEM_NOT_READY.")
}

if fmSmMgmtPortGUID != "" {
if err := os.Mkdir(filepath.Dir(smSocket), 0755); err != nil {
log.Printf("nvidia-fabricmanager-wrapper: error creating socket directory: %v\n", err)
}

runCommand(runCtx, &cmdWg, gracefulShutdown, smStopTimeout, smCmdFile, "--config", smConfigFile,
"--guid", fmSmMgmtPortGUID, "--pid_file", smPidFile, "--log_file", "stdout")

// vendor startup script waits for 5 seconds for NVLSM socket to be available before starting FM
// let's wait for the actual GRPC socket to be created by the plugin
log.Println("nvidia-fabricmanager-wrapper: waiting for socket creation at", smSocket)
err := waitForFile(runCtx, smSocket, smSocketWait)
if err != nil {
log.Printf("nvidia-fabricmanager-wrapper: error waiting for socket: %v\n", err)
} else {
log.Println("nvidia-fabricmanager-wrapper: socket found at", smSocket)
}
// for safety
time.Sleep(time.Second)
}

fmCmdArgs := []string{"--config", fmConfigFile}
if fmSmMgmtPortGUID != "" {
fmCmdArgs = append(fmCmdArgs, "--fm-sm-mgmt-port-guid", fmSmMgmtPortGUID)
}
runCommand(runCtx, &cmdWg, gracefulShutdown, fmStopTimeout, fmCmdFile, fmCmdArgs...)

log.Println("nvidia-fabricmanager-wrapper: initialization completed")
cmdWg.Wait()
}
Loading