Skip to content

Commit b096ddf

Browse files
committed
feat: add serverless config for keda.
Signed-off-by: X1aoZEOuO <[email protected]>
1 parent c0cd133 commit b096ddf

File tree

3 files changed

+115
-0
lines changed

3 files changed

+115
-0
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
apiVersion: llmaz.io/v1alpha1
2+
kind: OpenModel
3+
metadata:
4+
name: qwen2-0--5b
5+
spec:
6+
familyName: qwen2
7+
source:
8+
modelHub:
9+
modelID: Qwen/Qwen2-0.5B-Instruct-GGUF
10+
filename: qwen2-0_5b-instruct-q5_k_m.gguf
11+
---
12+
apiVersion: inference.llmaz.io/v1alpha1
13+
kind: Playground
14+
metadata:
15+
name: qwen2-0--5b
16+
spec:
17+
replicas: 0
18+
modelClaim:
19+
modelName: qwen2-0--5b
20+
backendRuntimeConfig:
21+
backendName: llamacpp
22+
configName: default
23+
args:
24+
- -fa # use flash attention
25+
---
26+
apiVersion: gateway.networking.k8s.io/v1
27+
kind: GatewayClass
28+
metadata:
29+
name: default-envoy-ai-gateway
30+
spec:
31+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
32+
---
33+
apiVersion: gateway.networking.k8s.io/v1
34+
kind: Gateway
35+
metadata:
36+
name: default-envoy-ai-gateway
37+
spec:
38+
gatewayClassName: default-envoy-ai-gateway
39+
listeners:
40+
- name: http
41+
protocol: HTTP
42+
port: 80
43+
---
44+
apiVersion: aigateway.envoyproxy.io/v1alpha1
45+
kind: AIGatewayRoute
46+
metadata:
47+
name: default-envoy-ai-gateway
48+
spec:
49+
schema:
50+
name: OpenAI
51+
targetRefs:
52+
- name: default-envoy-ai-gateway
53+
kind: Gateway
54+
group: gateway.networking.k8s.io
55+
rules:
56+
- matches:
57+
- headers:
58+
- type: Exact
59+
name: x-ai-eg-model
60+
value: qwen2-0--5b
61+
backendRefs:
62+
- name: qwen2-0--5b
63+
---
64+
apiVersion: aigateway.envoyproxy.io/v1alpha1
65+
kind: AIServiceBackend
66+
metadata:
67+
name: qwen2-0--5b
68+
spec:
69+
timeouts:
70+
request: 3m
71+
schema:
72+
name: OpenAI
73+
backendRef:
74+
name: qwen2-0--5b-lb
75+
kind: Service
76+
port: 8080
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
apiVersion: keda.sh/v1alpha1
2+
kind: ScaledObject
3+
metadata:
4+
name: qwen2-0--5b-scaler
5+
namespace: default
6+
spec:
7+
scaleTargetRef:
8+
apiVersion: inference.llmaz.io/v1alpha1
9+
kind: Playground
10+
name: qwen2-0--5b
11+
pollingInterval: 30
12+
cooldownPeriod: 50
13+
minReplicaCount: 0
14+
maxReplicaCount: 3
15+
triggers:
16+
- type: prometheus
17+
metadata:
18+
serverAddress: http://prometheus-operated.llmaz-system.svc.cluster.local:9090
19+
metricName: llamacpp:requests_processing
20+
query: sum(llamacpp:requests_processing)
21+
threshold: "0.2"
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: monitoring.coreos.com/v1
2+
kind: ServiceMonitor
3+
metadata:
4+
name: qwen2-0--5b-lb-monitor
5+
namespace: llmaz-system
6+
labels:
7+
control-plane: controller-manager
8+
app.kubernetes.io/name: servicemonitor
9+
spec:
10+
namespaceSelector:
11+
any: true
12+
selector:
13+
matchLabels:
14+
llmaz.io/model-name: qwen2-0--5b
15+
endpoints:
16+
- port: http
17+
path: /metrics
18+
scheme: http

0 commit comments

Comments
 (0)