Skip to content

Commit

Permalink
chart for nvidia-driver-runtime
Browse files Browse the repository at this point in the history
hostPID set for daemonset to ensure vgpu mgr works correctly

bump nvidia-driver-runtime to use official image
  • Loading branch information
ibrokethecloud authored and WebberHuang1118 committed Jan 23, 2024
1 parent 54d9b76 commit 9b6d99b
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 0 deletions.
23 changes: 23 additions & 0 deletions charts/nvidia-driver-runtime/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
27 changes: 27 additions & 0 deletions charts/nvidia-driver-runtime/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
apiVersion: v2
name: nvidia-driver-runtime
description: A Helm chart for Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"

maintainers:
- name: harvester
63 changes: 63 additions & 0 deletions charts/nvidia-driver-runtime/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "nvidia-driver-runtime.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "nvidia-driver-runtime.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "nvidia-driver-runtime.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "nvidia-driver-runtime.labels" -}}
helm.sh/chart: {{ include "nvidia-driver-runtime.chart" . }}
{{ include "nvidia-driver-runtime.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "nvidia-driver-runtime.selectorLabels" -}}
app.kubernetes.io/name: {{ include "nvidia-driver-runtime.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app: nvidia-driver-daemonset
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "nvidia-driver-runtime.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "nvidia-driver-runtime.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}
68 changes: 68 additions & 0 deletions charts/nvidia-driver-runtime/templates/daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: {{ include "nvidia-driver-runtime.fullname" . }}
labels:
{{- include "nvidia-driver-runtime.labels" . | nindent 4 }}
spec:
updateStrategy:
type: RollingUpdate
selector:
matchLabels:
{{- include "nvidia-driver-runtime.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "nvidia-driver-runtime.selectorLabels" . | nindent 8 }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "nvidia-driver-runtime.serviceAccountName" . }}
hostPID: true
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: nvidia-driver-ctr
env:
- name: "DRIVER_LOCATION"
value: "{{ .Values.driverLocation }}"
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
volumeMounts:
- name: var-log
mountPath: /var/log
- name: dev-log
mountPath: /dev/log
- name: host-os-release
mountPath: "/host-etc/os-release"
readOnly: true
- name: host-sys
mountPath: /sys
resources:
{{- toYaml .Values.resources | nindent 12 }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: var-log
hostPath:
path: /var/log
- name: dev-log
hostPath:
path: /dev/log
- name: host-os-release
hostPath:
path: "/etc/os-release"
- name: host-sys
hostPath:
path: /sys
type: Directory
12 changes: 12 additions & 0 deletions charts/nvidia-driver-runtime/templates/serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "nvidia-driver-runtime.serviceAccountName" . }}
labels:
{{- include "nvidia-driver-runtime.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
56 changes: 56 additions & 0 deletions charts/nvidia-driver-runtime/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Default values for nvidia-driver-runtime.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

image:
repository: rancher/harvester-nvidia-driver-toolkit
pullPolicy: Always
# Overrides the image tag whose default is the chart appVersion.
tag: sle-micro-head

imagePullSecrets: []
nameOverride: ""
fullnameOverride: "nvidia-driver-daemonset"

serviceAccount:
# Specifies whether a service account should be created
create: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""

podAnnotations: {}

podSecurityContext:
{}
# fsGroup: 2000

securityContext:
privileged: true
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000

resources:
{}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi

nodeSelector:
sriovgpu.harvesterhci.io/driver-needed: "true"
# http endpoint where nvidia kvm driver is hosted
# this will be pulled and installed by the daemonset on runtime
driverLocation: "HTTPENDPOINT/NVIDIA-Linux-x86_64-525.60.12-vgpu-kvm.run"

0 comments on commit 9b6d99b

Please sign in to comment.