Skip to content

Commit dc1c48c

Browse files
refactor configuration to support interLink (#68)
* configure node selector and tolerations independently * Update JSON schema * Update helm docs * Update JSON schema --------- Co-authored-by: GitHub Actions <[email protected]>
1 parent 6aa2ad1 commit dc1c48c

File tree

7 files changed

+83
-37
lines changed

7 files changed

+83
-37
lines changed

docs/.values-table.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
| triton.command | list | `["/bin/sh","-c"]` | Command and arguments to run in Triton container |
1111
| triton.args[0] | string | `"/opt/tritonserver/bin/tritonserver \\\n--model-repository=/tmp/ \\\n--log-verbose=0 \\\n--exit-timeout-secs=60\n"` | |
1212
| triton.resources | object | `{"limits":{"cpu":1,"memory":"2G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource limits and requests for each Triton instance. You can add necessary GPU request here. |
13+
| triton.annotations | object | `{}` | Annotations for Triton pods |
14+
| triton.nodeSelector | object | `{}` | Node selector for Triton pods |
15+
| triton.tolerations | list | `[]` | Tolerations for Triton pods |
1316
| triton.affinity | object | `{}` | Affinity rules for Triton pods - another way to request GPUs |
1417
| triton.modelRepository | object | `{"enabled":false,"mountPath":""}` | Model repository configuration |
1518
| triton.modelRepository.mountPath | string | `""` | Model repository mount path |
@@ -25,6 +28,9 @@
2528
| envoy.image | string | `"envoyproxy/envoy:v1.30.9"` | Envoy Proxy Docker image |
2629
| envoy.args | list | `["--config-path","/etc/envoy/envoy.yaml","--log-level","info","--log-path","/dev/stdout"]` | Arguments for Envoy |
2730
| envoy.resources | object | `{"limits":{"cpu":8,"memory":"4G"},"requests":{"cpu":1,"memory":"2G"}}` | Resource requests and limits for Envoy Proxy. Note: an Envoy Proxy with too many connections might run out of CPU |
31+
| envoy.annotations | object | `{}` | Annotations for Envoy pods |
32+
| envoy.nodeSelector | object | `{}` | Node selector for Envoy pods |
33+
| envoy.tolerations | list | `[]` | Tolerations for Envoy pods |
2834
| envoy.service.type | string | `"ClusterIP"` | This is the client-facing endpoint. In order to be able to connect to it, either enable ingress, or use type: LoadBalancer. |
2935
| envoy.service.ports | list | `[{"name":"grpc","port":8001,"targetPort":8001},{"name":"admin","port":9901,"targetPort":9901}]` | Envoy Service ports |
3036
| envoy.ingress | object | `{"annotations":{},"enabled":false,"hostName":"","ingressClassName":""}` | Ingress configuration for Envoy |
@@ -54,8 +60,6 @@
5460
| autoscaler.scaleDown.stabilizationWindowSeconds | int | `600` | |
5561
| autoscaler.scaleDown.periodSeconds | int | `120` | |
5662
| autoscaler.scaleDown.stepsize | int | `1` | |
57-
| nodeSelector | object | `{}` | Node selector for all pods (Triton and Envoy) |
58-
| tolerations | list | `[]` | Tolerations for all pods (Triton and Envoy) |
5963
| prometheus.external.enabled | bool | `false` | Enable external Prometheus instance. If true, Prometheus parameters outside of prometheus.external will be ignored. |
6064
| prometheus.external.url | string | `""` | External Prometheus server url |
6165
| prometheus.external.port | int | `443` | External Prometheus server port number |

helm/supersonic/templates/envoy/deployment.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ spec:
2121
app.kubernetes.io/name: {{ .Chart.Name }}
2222
app.kubernetes.io/instance: {{ include "supersonic.name" . }}
2323
app.kubernetes.io/component: envoy
24+
{{- with .Values.envoy.annotations }}
25+
annotations:
26+
{{- toYaml . | nindent 8 }}
27+
{{- end }}
2428
spec:
2529
containers:
2630
- name: envoy
@@ -68,13 +72,13 @@ spec:
6872
{{- end }}
6973

7074

71-
{{- if .Values.nodeSelector }}
75+
{{- if .Values.envoy.nodeSelector }}
7276
nodeSelector:
73-
{{ toYaml .Values.nodeSelector | nindent 8 }}
77+
{{ toYaml .Values.envoy.nodeSelector | nindent 8 }}
7478
{{- end }}
75-
{{- if .Values.tolerations }}
79+
{{- if .Values.envoy.tolerations }}
7680
tolerations:
77-
{{ toYaml .Values.tolerations | nindent 8 }}
81+
{{ toYaml .Values.envoy.tolerations | nindent 8 }}
7882
{{- end }}
7983
restartPolicy: Always
8084

helm/supersonic/templates/triton/deployment.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ spec:
1919
app.kubernetes.io/name: {{ .Chart.Name }}
2020
app.kubernetes.io/instance: {{ include "supersonic.name" . }}
2121
app.kubernetes.io/component: triton
22+
{{- with .Values.triton.annotations }}
23+
annotations:
24+
{{- toYaml . | nindent 8 }}
25+
{{- end }}
2226
spec:
2327
terminationGracePeriodSeconds: 60
2428
containers:
@@ -88,12 +92,12 @@ spec:
8892
affinity: {{ toYaml .Values.triton.affinity | nindent 8}}
8993
{{- end }}
9094

91-
{{- if .Values.nodeSelector }}
95+
{{- if .Values.triton.nodeSelector }}
9296
nodeSelector:
93-
{{ toYaml .Values.nodeSelector | nindent 8 }}
97+
{{ toYaml .Values.triton.nodeSelector | nindent 8 }}
9498
{{- end }}
95-
{{- if .Values.tolerations }}
99+
{{- if .Values.triton.tolerations }}
96100
tolerations:
97-
{{ toYaml .Values.tolerations | nindent 8 }}
101+
{{ toYaml .Values.triton.tolerations | nindent 8 }}
98102
{{- end }}
99103
restartPolicy: Always

helm/supersonic/values.schema.json

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,15 @@
7171
"requests"
7272
]
7373
},
74+
"annotations": {
75+
"type": "object"
76+
},
77+
"nodeSelector": {
78+
"type": "object"
79+
},
80+
"tolerations": {
81+
"type": "array"
82+
},
7483
"affinity": {
7584
"type": "object"
7685
},
@@ -211,15 +220,18 @@
211220
},
212221
"required": [
213222
"affinity",
223+
"annotations",
214224
"args",
215225
"command",
216226
"image",
217227
"modelRepository",
228+
"nodeSelector",
218229
"readinessProbe",
219230
"replicas",
220231
"resources",
221232
"service",
222-
"startupProbe"
233+
"startupProbe",
234+
"tolerations"
223235
]
224236
},
225237
"envoy": {
@@ -279,6 +291,15 @@
279291
"requests"
280292
]
281293
},
294+
"annotations": {
295+
"type": "object"
296+
},
297+
"nodeSelector": {
298+
"type": "object"
299+
},
300+
"tolerations": {
301+
"type": "array"
302+
},
282303
"service": {
283304
"type": "object",
284305
"properties": {
@@ -425,17 +446,20 @@
425446
}
426447
},
427448
"required": [
449+
"annotations",
428450
"args",
429451
"auth",
430452
"enabled",
431453
"grpc_route_timeout",
432454
"image",
433455
"ingress",
434456
"loadBalancerPolicy",
457+
"nodeSelector",
435458
"rate_limiter",
436459
"replicas",
437460
"resources",
438461
"service",
462+
"tolerations",
439463
"tracing_sampling_rate"
440464
]
441465
},
@@ -502,12 +526,6 @@
502526
"zeroIdleReplicas"
503527
]
504528
},
505-
"nodeSelector": {
506-
"type": "object"
507-
},
508-
"tolerations": {
509-
"type": "array"
510-
},
511529
"prometheus": {
512530
"type": "object",
513531
"properties": {
@@ -1942,13 +1960,11 @@
19421960
"grafana",
19431961
"metricsCollector",
19441962
"nameOverride",
1945-
"nodeSelector",
19461963
"opentelemetry-collector",
19471964
"prometheus",
19481965
"serverLoadMetric",
19491966
"serverLoadThreshold",
19501967
"tempo",
1951-
"tolerations",
19521968
"triton"
19531969
]
19541970
}

helm/supersonic/values.yaml

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,16 @@ triton:
4242
requests:
4343
cpu: 1
4444
memory: "2G"
45-
45+
46+
# -- Annotations for Triton pods
47+
annotations: {}
48+
49+
# -- Node selector for Triton pods
50+
nodeSelector: {}
51+
52+
# -- Tolerations for Triton pods
53+
tolerations: []
54+
4655
# -- Affinity rules for Triton pods - another way to request GPUs
4756
affinity: {}
4857

@@ -125,6 +134,16 @@ envoy:
125134
limits:
126135
cpu: 8.0
127136
memory: "4G"
137+
138+
# -- Annotations for Envoy pods
139+
annotations: {}
140+
141+
# -- Node selector for Envoy pods
142+
nodeSelector: {}
143+
144+
# -- Tolerations for Envoy pods
145+
tolerations: []
146+
128147
service:
129148
# -- This is the client-facing endpoint. In order to be able to connect to it,
130149
# either enable ingress, or use type: LoadBalancer.
@@ -206,12 +225,6 @@ autoscaler:
206225
periodSeconds: 120
207226
stepsize: 1
208227

209-
# -- Node selector for all pods (Triton and Envoy)
210-
nodeSelector: {}
211-
212-
# -- Tolerations for all pods (Triton and Envoy)
213-
tolerations: []
214-
215228
prometheus:
216229
external:
217230
# -- Enable external Prometheus instance. If true, Prometheus parameters outside of prometheus.external will be ignored.

values/values-geddes-cms.yaml

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ triton:
2525
resources:
2626
limits: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
2727
requests: { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
28+
nodeSelector: {'cms-af-prod': 'true'}
29+
tolerations:
30+
- key: hub.jupyter.org/dedicated
31+
operator: Equal
32+
value: cms-af
33+
effect: NoSchedule
2834
service:
2935
labels:
3036
scrape_metrics: "true"
@@ -37,6 +43,12 @@ triton:
3743

3844
envoy:
3945
enabled: true
46+
nodeSelector: {'cms-af-prod': 'true'}
47+
tolerations:
48+
- key: hub.jupyter.org/dedicated
49+
operator: Equal
50+
value: cms-af
51+
effect: NoSchedule
4052
loadBalancerPolicy: "ROUND_ROBIN"
4153
service:
4254
type: LoadBalancer
@@ -53,14 +65,6 @@ autoscaler:
5365
ingress:
5466
enabled: false
5567

56-
nodeSelector: {'cms-af-prod': 'true'}
57-
58-
tolerations:
59-
- key: hub.jupyter.org/dedicated
60-
operator: Equal
61-
value: cms-af
62-
effect: NoSchedule
63-
6468
prometheus:
6569
enabled: true
6670
server:

values/values-nautilus-cms.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ triton:
2323
resources:
2424
limits: { cpu: 1, memory: 3G, nvidia.com/gpu: 1}
2525
requests: { cpu: 1, memory: 3G, nvidia.com/gpu: 1}
26+
nodeSelector:
27+
topology.kubernetes.io/zone: ucsd
2628
# affinity:
2729
# nodeAffinity:
2830
# requiredDuringSchedulingIgnoredDuringExecution:
@@ -44,6 +46,8 @@ envoy:
4446
replicas: 1
4547
grpc_route_timeout: 5s
4648
loadBalancerPolicy: "LEAST_REQUEST"
49+
nodeSelector:
50+
topology.kubernetes.io/zone: ucsd
4751
ingress:
4852
enabled: true
4953
hosts:
@@ -78,9 +82,6 @@ autoscaler:
7882
periodSeconds: 15
7983
stepsize: 1
8084

81-
nodeSelector:
82-
topology.kubernetes.io/zone: ucsd
83-
8485
prometheus:
8586
enabled: true
8687
server:

0 commit comments

Comments
 (0)