11serverLoadThreshold : 20
22serverLoadMetric : ' sum by (release) (rate(nv_inference_queue_duration_us{release=~"sonic-server"}[30s]) / (rate(nv_inference_exec_count{release=~"sonic-server"}[30s]) * 1000 + 0.001))'
33
4- triton :
5- # image: fastml/triton-torchgeo:21.02-py3-geometric # run2
6- image : fastml/triton-torchgeo:22.07-py3-geometric # run3
4+ triton :
5+ image : nvcr.io/nvidia/tritonserver:24.11-py3
76 command : ["/bin/sh", "-c"]
8- args :
7+ args :
98 - |
109 /opt/tritonserver/bin/tritonserver \
1110 --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoBTag/Combined/data/models/ \
12- --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoEgamma/EgammaPhotonProducers/data/models/ \
1311 --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoTauTag/TrainingFiles/data/DeepTauIdSONIC/ \
1412 --model-repository=/cvmfs/cms.cern.ch/el9_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre7/external/el9_amd64_gcc12/data/RecoMET/METPUSubtraction/data/models/ \
13+ --trace-config mode=opentelemetry \
14+ --trace-config=opentelemetry,resource=pod_name=$(hostname) \
15+ --trace-config opentelemetry,url=sonic-server-opentelemetry-collector:4318/v1/traces \
16+ --trace-config rate=100 \
17+ --trace-config level=TIMESTAMPS \
18+ --trace-config count=-1 \
1519 --allow-gpu-metrics=true \
1620 --log-verbose=0 \
1721 --strict-model-config=false \
1822 --exit-timeout-secs=60
19- # --trace-config mode=opentelemetry
20- # --trace-config=opentelemetry,resource=pod_name=$(hostname)
21- # --trace-config opentelemetry,url=sonic-server-opentelemetry-collector:4318/v1/traces
22- # --trace-config rate=100 # 1 in 100 requests
23- # --trace-config level=TIMESTAMPS
24- # --trace-config count=-1
23+
2524 resources :
26- limits : { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
27- requests : { nvidia.com/gpu: 1, cpu: 2, memory: 4G}
28- nodeSelector : {' cms-af-prod': ' true' }
25+ limits : { nvidia.com/gpu: 1, cpu: 2, memory: 4G }
26+ requests : { nvidia.com/gpu: 1, cpu: 2, memory: 4G }
27+ nodeSelector : { " cms-af-prod": " true" }
2928 tolerations :
3029 - key : hub.jupyter.org/dedicated
3130 operator : Equal
@@ -43,7 +42,7 @@ triton:
4342
4443envoy :
4544 enabled : true
46- nodeSelector : {' cms-af-prod': ' true' }
45+ nodeSelector : { " cms-af-prod": " true" }
4746 tolerations :
4847 - key : hub.jupyter.org/dedicated
4948 operator : Equal
@@ -56,30 +55,33 @@ envoy:
5655 enabled : true
5756 hostName : sonic-cms.geddes.rcac.purdue.edu
5857 ingressClassName : public
58+ rate_limiter :
59+ prometheus_based :
60+ enabled : true
61+ tracing_sampling_rate : 0.01
5962
6063keda :
6164 enabled : true
6265 minReplicaCount : 1
63- maxReplicaCount : 7
66+ maxReplicaCount : 11
67+ scaleUp :
68+ stabilizationWindowSeconds : 30
69+ periodSeconds : 15
70+ stepsize : 1
71+ scaleDown :
72+ stabilizationWindowSeconds : 45
73+ periodSeconds : 45
74+ stepsize : 1
6475
6576ingress :
6677 enabled : false
6778
6879prometheus :
69- enabled : true
70- server :
71- useExistingClusterRoleName : sonic-server-prometheus-role
72- ingress :
73- enabled : true
74- hosts :
75- - prometheus-cms.geddes.rcac.purdue.edu
76- tls :
77- - hosts :
78- - prometheus-cms.geddes.rcac.purdue.edu
79- ingressClassName : public
80- serviceAccounts :
81- server :
82- name : sonic-server-prometheus-sa
80+ external :
81+ enabled : true
82+ url : prometheus-af.geddes.rcac.purdue.edu
83+ port : 443
84+ scheme : https
8385
8486grafana :
8587 enabled : true
@@ -92,7 +94,7 @@ grafana:
9294 type : prometheus
9395 access : proxy
9496 isDefault : true
95- url : http ://sonic-server- prometheus-server:9090
97+ url : https ://prometheus-af.geddes.rcac.purdue.edu
9698 jsonData :
9799 timeInterval : " 5s"
98100 tlsSkipVerify : true
@@ -106,7 +108,7 @@ grafana:
106108 timeInterval : " 5s"
107109 tlsSkipVerify : true
108110 serviceMap :
109- datasourceUid : ' prometheus'
111+ datasourceUid : " prometheus"
110112 nodeGraph :
111113 enabled : true
112114 ingress :
@@ -127,13 +129,14 @@ opentelemetry-collector:
127129 exporters :
128130 otlp :
129131 endpoint : http://sonic-server-tempo:4317
130- otlphttp :
132+ otlphttp :
131133 endpoint : http://sonic-server-tempo:4318
132134 prometheusremotewrite :
133- endpoint : http://sonic-server-prometheus-server:9090/api/v1/write
135+ endpoint : http://prometheus-server:9090/api/v1/write
136+
134137tempo :
135138 enabled : true
136139 tempo :
137140 metricsGenerator :
138141 enabled : true
139- remoteWriteUrl : http://sonic-server- prometheus-server:9090/api/v1/write
142+ remoteWriteUrl : http://prometheus-server:9090/api/v1/write
0 commit comments