Skip to content

Commit

Permalink
config custom metrics
Browse files Browse the repository at this point in the history
Signed-off-by: chipzoller <[email protected]>

finalize

Signed-off-by: Chip Zoller <[email protected]>

fix nil pointer

Signed-off-by: Chip Zoller <[email protected]>

newline

Signed-off-by: chipzoller <[email protected]>

empty

Signed-off-by: chipzoller <[email protected]>

change if

Signed-off-by: chipzoller <[email protected]>

fix

Signed-off-by: chipzoller <[email protected]>
  • Loading branch information
chipzoller committed Aug 26, 2024
1 parent f759c13 commit 5d73d7f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
5 changes: 3 additions & 2 deletions deployments/gpu-operator/templates/clusterpolicy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -511,8 +511,9 @@ spec:
{{- if .Values.dcgmExporter.args }}
args: {{ toYaml .Values.dcgmExporter.args | nindent 6 }}
{{- end }}
{{- if .Values.dcgmExporter.config }}
config: {{ toYaml .Values.dcgmExporter.config | nindent 6 }}
{{- if and (.Values.dcgmExporter.config) (.Values.dcgmExporter.config.name) }}
config:
name: {{ .Values.dcgmExporter.config.name }}
{{- end }}
{{- if .Values.dcgmExporter.serviceMonitor }}
serviceMonitor: {{ toYaml .Values.dcgmExporter.serviceMonitor | nindent 6 }}
Expand Down
14 changes: 14 additions & 0 deletions deployments/gpu-operator/templates/dcgm_exporter_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{- if .Values.dcgmExporter.config }}
{{- if and (.Values.dcgmExporter.config.create) (not (empty .Values.dcgmExporter.config.data)) }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Values.dcgmExporter.config.name }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gpu-operator.labels" . | nindent 4 }}
data:
dcgm-metrics.csv: |
{{- .Values.dcgmExporter.config.data | nindent 4 }}
{{- end }}
{{- end }}
19 changes: 19 additions & 0 deletions deployments/gpu-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,26 @@ dcgmExporter:
# target_label: instance
# replacement: $1
# action: replace
# DCGM Exporter configuration
# This block is used to configure DCGM Exporter to emit a customized list of metrics.
# Use "name" to either point to an existing ConfigMap or to create a new one with a
# list of configurations (i.e with create=true).
# When pointing to an existing ConfigMap, the ConfigMap must exist in the same namespace as the release.
# The metrics are expected to be listed under a key called `dcgm-metrics.csv`.
# Use "data" to build an integrated ConfigMap from a set of custom metrics as
# part of the chart. An example of some custom metrics are shown below. Note that
# the contents of "data" must be in CSV format and be valid DCGM Exporter metric configurations.
# config:
# name: custom-dcgm-exporter-metrics
# create: true
# data: |-
# Format
# If line starts with a '#' it is considered a comment
# DCGM FIELD, Prometheus metric type, help message

# Clocks
# DCGM_FI_DEV_SM_CLOCK, gauge, SM clock frequency (in MHz).
# DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz).
gfd:
enabled: true
repository: nvcr.io/nvidia
Expand Down

0 comments on commit 5d73d7f

Please sign in to comment.