Skip to content

Commit 883c40a

Browse files
authored
Add prefill heavy e2e benchmarking test to github actions. (#1894)
1 parent 5503db5 commit 883c40a

File tree

5 files changed

+408
-16
lines changed

5 files changed

+408
-16
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
rlakhtakia
2+
liu-cong
3+
kfswain
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
name: GKE Prefill Heavy Test
2+
3+
on:
4+
# Runs with a PR comment /run-gke-prefill-heavy
5+
issue_comment:
6+
types: [created]
7+
workflow_dispatch:
8+
inputs:
9+
pr_or_branch:
10+
description: 'Pull-request number or branch name to test'
11+
required: true
12+
default: 'main'
13+
type: string
14+
15+
permissions:
16+
contents: read
17+
18+
jobs:
19+
# Authorization Job: Ensures only authorized users can execute workflow
20+
# Note, even if user checks out branch to modify access, user will need to provide correct secret keys to deploy to GCP.
21+
check_access:
22+
runs-on: ubuntu-latest
23+
24+
if: |
25+
(github.event_name == 'issue_comment' &&
26+
github.event.issue.pull_request &&
27+
contains(github.event.comment.body, '/run-gke-prefill-heavy')) || github.event_name == 'workflow_dispatch'
28+
29+
outputs:
30+
authorized: ${{ steps.auth_logic.outputs.authorized }}
31+
32+
steps:
33+
- name: Checkout Repository
34+
uses: actions/checkout@v4
35+
36+
- name: Authorization Logic
37+
id: auth_logic
38+
shell: bash
39+
run: |
40+
authorized='false'
41+
auth_file=".github/authorized_workflow_users.txt"
42+
user=""
43+
role=""
44+
45+
if [[ "${{ github.event_name }}" == "issue_comment" ]]; then
46+
user="${{ github.event.comment.user.login }}"
47+
role="${{ github.event.comment.author_association }}"
48+
49+
if [[ "${{ github.event.issue.pull_request.base.ref }}" != "main" ]]; then
50+
echo "PR base is not 'main'."
51+
echo "authorized=false" >> "$GITHUB_OUTPUT"
52+
exit 1
53+
fi
54+
55+
elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
56+
user="${{ github.actor }}"
57+
fi
58+
59+
if [[ "$role" == "OWNER" || "$role" == "MAINTAINER" ]]; then
60+
echo "User authorized by role: $role"
61+
authorized='true'
62+
63+
elif grep -Fxq "$user" "$auth_file"; then
64+
echo "User authorized by file lookup: $auth_file"
65+
authorized='true'
66+
fi
67+
68+
echo "authorized=$authorized" >> "$GITHUB_OUTPUT"
69+
70+
deploy_and_validate:
71+
needs: [check_access]
72+
if: |
73+
(github.event_name == 'workflow_dispatch' || github.event_name == 'issue_comment') &&
74+
needs.check_access.outputs.authorized == 'true'
75+
76+
name: Test on ${{ matrix.accelerator.name }}
77+
runs-on: ubuntu-latest
78+
79+
strategy:
80+
fail-fast: false
81+
max-parallel: 1
82+
matrix:
83+
accelerator:
84+
- name: GPU
85+
86+
env:
87+
GCP_PROJECT_ID: llm-d-scale
88+
GKE_CLUSTER_NAME: llm-d-e2e-us-east5
89+
GKE_CLUSTER_ZONE: us-east5
90+
NAMESPACE: igw-prefill-heavy
91+
GATEWAY: gke-l7-regional-external-managed
92+
GATEWAY_TYPE: gke
93+
PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch || github.event.issue.number || github.event.number || 'actions' }}
94+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
95+
MODEL: meta-llama/Llama-3.1-8B-Instruct
96+
GSA_EMAIL: ${{ secrets.GCS_WORKLOAD_SA }}
97+
GCS_BUCKET: igw-e2e-benchmark-results
98+
KSA_NAME: igw-e2e-benchmark-sa
99+
100+
steps:
101+
- name: Checkout
102+
uses: actions/checkout@v4
103+
with:
104+
persist-credentials: false
105+
106+
- name: Determine if pr_or_branch is a PR number
107+
id: check_pr
108+
env:
109+
PR_OR_BRANCH: ${{ github.event.inputs.pr_or_branch }}
110+
shell: bash
111+
run: |
112+
echo "PR_OR_BRANCH=${PR_OR_BRANCH:-actions}" >> "$GITHUB_ENV"
113+
if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then
114+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
115+
elif [[ "${{ github.event_name }}" = "pull_request" ]]; then
116+
echo "PR_OR_BRANCH=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
117+
echo "is_pr=true" >> "$GITHUB_OUTPUT"
118+
else
119+
echo "is_pr=false" >> "$GITHUB_OUTPUT"
120+
fi
121+
122+
- name: Fetch and checkout PR
123+
if: steps.check_pr.outputs.is_pr == 'true'
124+
run: |
125+
git fetch origin pull/"$PR_OR_BRANCH"/head:pr-"$PR_OR_BRANCH"
126+
git checkout pr-"$PR_OR_BRANCH"
127+
128+
- name: Checkout branch
129+
if: steps.check_pr.outputs.is_pr == 'false'
130+
run: git checkout "$PR_OR_BRANCH"
131+
132+
- name: Authenticate to Google Cloud
133+
id: auth
134+
uses: google-github-actions/auth@b7593ed2efd1c1617e1b0254da33b86225adb2a5
135+
with:
136+
credentials_json: ${{ secrets.GCP_SA_KEY }}
137+
138+
- name: Set up gcloud CLI and kubectl
139+
uses: google-github-actions/setup-gcloud@cb1e50a9932213ecece00a606661ae9ca44f3397
140+
with:
141+
project_id: ${{ env.GCP_PROJECT_ID }}
142+
install_components: 'kubectl,gke-gcloud-auth-plugin'
143+
144+
- name: Get GKE credentials
145+
run: |
146+
gcloud container clusters get-credentials "${{ env.GKE_CLUSTER_NAME }}" --zone "${{ env.GKE_CLUSTER_ZONE }}"
147+
148+
- name: Create namespace
149+
run: |
150+
kubectl create namespace "${NAMESPACE}" || echo "Namespace already exists"
151+
152+
- name: Create hf-token secret
153+
run: |
154+
kubectl create secret generic hf-token \
155+
--from-literal="token=${{ secrets.HF_TOKEN }}" \
156+
--namespace "${NAMESPACE}" \
157+
--dry-run=client -o yaml | kubectl apply -f -
158+
159+
- name: Create and Annotate KSA for Workload Identity
160+
run: |
161+
kubectl create serviceaccount $KSA_NAME --namespace "${NAMESPACE}" --dry-run=client -o yaml | kubectl apply -f -
162+
kubectl annotate serviceaccount $KSA_NAME \
163+
iam.gke.io/gcp-service-account=$GSA_EMAIL \
164+
--overwrite \
165+
--namespace "${NAMESPACE}"
166+
167+
- name: Deploy Model Server and CRDs
168+
run: |
169+
cd config/manifests/vllm
170+
echo "Deploying Model Server..."
171+
kubectl apply -f gpu-deployment.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
172+
echo "Installing CRDs"
173+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.1.0/manifests.yaml
174+
echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
175+
176+
- name: Deploy InferencePool and Endpoint Picker Extension
177+
run: |
178+
export IGW_CHART_VERSION=v1.1.0
179+
helm install vllm-llama3-8b-instruct \
180+
--namespace $NAMESPACE \
181+
--set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
182+
--set provider.name=$GATEWAY_TYPE \
183+
--version $IGW_CHART_VERSION \
184+
oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool | tee ~/igw-prefill-heavy-deployment.log
185+
echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
186+
187+
- name: Wait for all pods to be ready
188+
run: |
189+
kubectl wait pod \
190+
--for=condition=Ready \
191+
--all \
192+
-n "${NAMESPACE}" \
193+
--timeout=25m
194+
echo "✅ All pods are ready."
195+
kubectl get pods -n "${NAMESPACE}"
196+
197+
- name: Deploy Gateway
198+
run: |
199+
GATEWAY_NAME=inference-gateway
200+
kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found
201+
kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found
202+
echo "Deploying Gateway..."
203+
kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/gateway.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
204+
echo "Deploying HTTPRoute..."
205+
kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/httproute.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
206+
echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
207+
208+
- name: Wait for gateway to be ready
209+
run: |
210+
GATEWAY_NAME=inference-gateway
211+
kubectl wait gateway/${GATEWAY_NAME} \
212+
--for=condition=Programmed=True \
213+
-n "${NAMESPACE}" \
214+
--timeout=500s
215+
echo "✅ Gateway is ready."
216+
kubectl get gateway -n "${NAMESPACE}"
217+
218+
- name: Show deployment status
219+
run: |
220+
echo "=== Deployments ==="
221+
kubectl get deployments -n "${NAMESPACE}"
222+
echo ""
223+
echo "=== Pods ==="
224+
kubectl get pods -n "${NAMESPACE}"
225+
echo ""
226+
echo "=== Services ==="
227+
kubectl get svc -n "${NAMESPACE}"
228+
echo ""
229+
echo "=== Helm releases ==="
230+
helm list -n "${NAMESPACE}" || true
231+
echo ""
232+
echo "=== Inference Pools ==="
233+
kubectl get inferencepools -n "${NAMESPACE}" || true
234+
echo ""
235+
echo "=== HTTPRoutes ==="
236+
kubectl get httproutes -n "${NAMESPACE}" -o yaml || true
237+
echo ""
238+
echo "=== Gateway ==="
239+
kubectl get Gateway -n "${NAMESPACE}" || true
240+
echo ""
241+
242+
- name: Verify installation and run validation test
243+
run: |
244+
cd .github/scripts/e2e
245+
./e2e-validate.sh -n "${NAMESPACE}" -v -m ${MODEL}
246+
247+
- name: Run benchmarking test
248+
run: |
249+
TIMESTAMP=$(date +"%Y-%m-%d-%H-%M-%S")
250+
cd benchmarking/single-workload
251+
host="${GATEWAY_HOST:-$(kubectl get gateway -n "$NAMESPACE" \
252+
-o jsonpath='{.items[0].status.addresses[0].value}' 2>/dev/null || true)}"
253+
if [[ -z "$host" ]]; then
254+
echo "Error: could not discover a Gateway address in namespace '$NAMESPACE'." >&2
255+
exit 1
256+
fi
257+
port=80
258+
svc_host="${host}:${port}"
259+
helm install prefill-heavy-benchmark ../inference-perf/ -f prefill-heavy-values.yaml \
260+
--namespace "${NAMESPACE}" \
261+
--create-namespace \
262+
--set token.hfToken="${HF_TOKEN}" \
263+
--set "config.server.base_url=http://${svc_host}" \
264+
--set "job.serviceAccountName=$KSA_NAME" \
265+
--set "job.image.tag=latest" \
266+
--set "config.storage.google_cloud_storage.bucket_name=${GCS_BUCKET}" \
267+
--set "config.storage.google_cloud_storage.path=${NAMESPACE}/${TIMESTAMP}" \
268+
--set "gcsPath=gs://${GCS_BUCKET}/datasets/billsum_conversations.json" \
269+
--set "config.data.path=/gcsDataset/gcs-dataset.json" \
270+
--set-string 'job.resources.limits.nvidia\.com/gpu=1'
271+
272+
- name: Wait for benchmarking job to finish
273+
run: |
274+
job_name=prefill-heavy-benchmark-inference-perf-job
275+
TIMEOUT_DURATION="7200s"
276+
if ! kubectl wait --for=condition=complete job/"$job_name" -n "$NAMESPACE" --timeout="$TIMEOUT_DURATION"; then
277+
echo "Error: Benchmark job $job_name did not complete successfully within $TIMEOUT_DURATION." >&2
278+
echo "--- Job Description ---" >&2
279+
kubectl describe job "$job_name" -n "$NAMESPACE" >&2
280+
echo "--- Pod Logs (Last 50 lines) ---" >&2
281+
kubectl logs -l job-name="$job_name" -n "$NAMESPACE" --all-containers=true --tail 50 >&2
282+
exit 1
283+
fi
284+
echo "✅ Benchmarking Job Completed."
285+
286+
- name: Collect and upload Kubernetes pod logs
287+
if: always()
288+
run: |
289+
mkdir -p pod-logs-inference-prefill-heavy
290+
cd pod-logs-inference-prefill-heavy
291+
echo "Fetching ${NAMESPACE} pods log..."
292+
kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
293+
| xargs -I{} sh -c 'kubectl logs --all-containers=true -n "${NAMESPACE}" {} > "{}.log" 2>&1'
294+
echo "Fetching ${NAMESPACE} pods descriptions..."
295+
kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
296+
| xargs -I{} sh -c 'kubectl describe pod -n "${NAMESPACE}" {} > "{}-describe.log" 2>&1'
297+
mv ~/igw-prefill-heavy-deployment.log . || true
298+
mv ~/install-deps.log . || true
299+
300+
- name: Upload pod logs as artifact
301+
uses: actions/upload-artifact@v4
302+
if: always()
303+
with:
304+
name: igw-pod-logs-inference-prefill-heavy-${{ matrix.accelerator.name }}
305+
path: pod-logs-inference-prefill-heavy
306+
307+
- name: Send Google Chat notification on failure
308+
if: failure()
309+
uses: SimonScholz/google-chat-action@3b3519e5102dba8aa5046fd711c4b553586409bb
310+
with:
311+
webhookUrl: ${{ secrets.GOOGLE_CHAT_WEBHOOK }}
312+
jobStatus: ${{ job.status }}
313+
title: '${{ github.workflow }} - ${{ matrix.accelerator.name }}'
314+
315+
- name: Cleanup deployment
316+
if: always()
317+
run: |
318+
GATEWAY_NAME=inference-gateway
319+
helm uninstall vllm-llama3-8b-instruct -n ${NAMESPACE} --ignore-not-found
320+
helm uninstall prefill-heavy-benchmark -n ${NAMESPACE} --ignore-not-found
321+
kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found
322+
kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found

0 commit comments

Comments
 (0)