File tree Expand file tree Collapse file tree 3 files changed +115
-0
lines changed Expand file tree Collapse file tree 3 files changed +115
-0
lines changed Original file line number Diff line number Diff line change 1+ apiVersion : llmaz.io/v1alpha1
2+ kind : OpenModel
3+ metadata :
4+ name : qwen2-0--5b
5+ spec :
6+ familyName : qwen2
7+ source :
8+ modelHub :
9+ modelID : Qwen/Qwen2-0.5B-Instruct-GGUF
10+ filename : qwen2-0_5b-instruct-q5_k_m.gguf
11+ ---
12+ apiVersion : inference.llmaz.io/v1alpha1
13+ kind : Playground
14+ metadata :
15+ name : qwen2-0--5b
16+ spec :
17+ replicas : 0
18+ modelClaim :
19+ modelName : qwen2-0--5b
20+ backendRuntimeConfig :
21+ backendName : llamacpp
22+ configName : default
23+ args :
24+ - -fa # use flash attention
25+ ---
26+ apiVersion : gateway.networking.k8s.io/v1
27+ kind : GatewayClass
28+ metadata :
29+ name : default-envoy-ai-gateway
30+ spec :
31+ controllerName : gateway.envoyproxy.io/gatewayclass-controller
32+ ---
33+ apiVersion : gateway.networking.k8s.io/v1
34+ kind : Gateway
35+ metadata :
36+ name : default-envoy-ai-gateway
37+ spec :
38+ gatewayClassName : default-envoy-ai-gateway
39+ listeners :
40+ - name : http
41+ protocol : HTTP
42+ port : 80
43+ ---
44+ apiVersion : aigateway.envoyproxy.io/v1alpha1
45+ kind : AIGatewayRoute
46+ metadata :
47+ name : default-envoy-ai-gateway
48+ spec :
49+ schema :
50+ name : OpenAI
51+ targetRefs :
52+ - name : default-envoy-ai-gateway
53+ kind : Gateway
54+ group : gateway.networking.k8s.io
55+ rules :
56+ - matches :
57+ - headers :
58+ - type : Exact
59+ name : x-ai-eg-model
60+ value : qwen2-0--5b
61+ backendRefs :
62+ - name : qwen2-0--5b
63+ ---
64+ apiVersion : aigateway.envoyproxy.io/v1alpha1
65+ kind : AIServiceBackend
66+ metadata :
67+ name : qwen2-0--5b
68+ spec :
69+ timeouts :
70+ request : 3m
71+ schema :
72+ name : OpenAI
73+ backendRef :
74+ name : qwen2-0--5b-lb
75+ kind : Service
76+ port : 8080
Original file line number Diff line number Diff line change 1+ apiVersion : keda.sh/v1alpha1
2+ kind : ScaledObject
3+ metadata :
4+ name : qwen2-0--5b-scaler
5+ namespace : default
6+ spec :
7+ scaleTargetRef :
8+ apiVersion : inference.llmaz.io/v1alpha1
9+ kind : Playground
10+ name : qwen2-0--5b
11+ pollingInterval : 30
12+ cooldownPeriod : 50
13+ minReplicaCount : 0
14+ maxReplicaCount : 3
15+ triggers :
16+ - type : prometheus
17+ metadata :
18+ serverAddress : http://prometheus-operated.llmaz-system.svc.cluster.local:9090
19+ metricName : llamacpp:requests_processing
20+ query : sum(llamacpp:requests_processing)
21+ threshold : " 0.2"
Original file line number Diff line number Diff line change 1+ apiVersion : monitoring.coreos.com/v1
2+ kind : ServiceMonitor
3+ metadata :
4+ name : qwen2-0--5b-lb-monitor
5+ namespace : llmaz-system
6+ labels :
7+ control-plane : controller-manager
8+ app.kubernetes.io/name : servicemonitor
9+ spec :
10+ namespaceSelector :
11+ any : true
12+ selector :
13+ matchLabels :
14+ llmaz.io/model-name : qwen2-0--5b
15+ endpoints :
16+ - port : http
17+ path : /metrics
18+ scheme : http
You can’t perform that action at this time.
0 commit comments