1+ name : GKE Prefill Heavy Test
2+
3+ on :
4+ # Runs with a PR comment /run-gke-prefill-heavy
5+ issue_comment :
6+ types : [created]
7+ workflow_dispatch :
8+ inputs :
9+ pr_or_branch :
10+ description : ' Pull-request number or branch name to test'
11+ required : true
12+ default : ' main'
13+ type : string
14+
15+ permissions :
16+ contents : read
17+
18+ jobs :
19+ # Authorization Job: Ensures only authorized users can execute workflow
20+ # Note, even if user checks out branch to modify access, user will need to provide correct secret keys to deploy to GCP.
21+ check_access :
22+ runs-on : ubuntu-latest
23+
24+ if : |
25+ (github.event_name == 'issue_comment' &&
26+ github.event.issue.pull_request &&
27+ contains(github.event.comment.body, '/run-gke-prefill-heavy')) || github.event_name == 'workflow_dispatch'
28+
29+ outputs :
30+ authorized : ${{ steps.auth_logic.outputs.authorized }}
31+
32+ steps :
33+ - name : Checkout Repository
34+ uses : actions/checkout@v4
35+
36+ - name : Authorization Logic
37+ id : auth_logic
38+ shell : bash
39+ run : |
40+ authorized='false'
41+ auth_file=".github/authorized_workflow_users.txt"
42+ user=""
43+ role=""
44+
45+ if [[ "${{ github.event_name }}" == "issue_comment" ]]; then
46+ user="${{ github.event.comment.user.login }}"
47+ role="${{ github.event.comment.author_association }}"
48+
49+ if [[ "${{ github.event.issue.pull_request.base.ref }}" != "main" ]]; then
50+ echo "PR base is not 'main'."
51+ echo "authorized=false" >> "$GITHUB_OUTPUT"
52+ exit 1
53+ fi
54+
55+ elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
56+ user="${{ github.actor }}"
57+ fi
58+
59+ if [[ "$role" == "OWNER" || "$role" == "MAINTAINER" ]]; then
60+ echo "User authorized by role: $role"
61+ authorized='true'
62+
63+ elif grep -Fxq "$user" "$auth_file"; then
64+ echo "User authorized by file lookup: $auth_file"
65+ authorized='true'
66+ fi
67+
68+ echo "authorized=$authorized" >> "$GITHUB_OUTPUT"
69+
70+ deploy_and_validate :
71+ needs : [check_access]
72+ if : |
73+ (github.event_name == 'workflow_dispatch' || github.event_name == 'issue_comment') &&
74+ needs.check_access.outputs.authorized == 'true'
75+
76+ name : Test on ${{ matrix.accelerator.name }}
77+ runs-on : ubuntu-latest
78+
79+ strategy :
80+ fail-fast : false
81+ max-parallel : 1
82+ matrix :
83+ accelerator :
84+ - name : GPU
85+
86+ env :
87+ GCP_PROJECT_ID : llm-d-scale
88+ GKE_CLUSTER_NAME : llm-d-e2e-us-east5
89+ GKE_CLUSTER_ZONE : us-east5
90+ NAMESPACE : igw-prefill-heavy
91+ GATEWAY : gke-l7-regional-external-managed
92+ GATEWAY_TYPE : gke
93+ PR_OR_BRANCH : ${{ github.event.inputs.pr_or_branch || github.event.issue.number || github.event.number || 'actions' }}
94+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
95+ MODEL : meta-llama/Llama-3.1-8B-Instruct
96+ GSA_EMAIL : ${{ secrets.GCS_WORKLOAD_SA }}
97+ GCS_BUCKET : igw-e2e-benchmark-results
98+ KSA_NAME : igw-e2e-benchmark-sa
99+
100+ steps :
101+ - name : Checkout
102+ uses : actions/checkout@v4
103+ with :
104+ persist-credentials : false
105+
106+ - name : Determine if pr_or_branch is a PR number
107+ id : check_pr
108+ env :
109+ PR_OR_BRANCH : ${{ github.event.inputs.pr_or_branch }}
110+ shell : bash
111+ run : |
112+ echo "PR_OR_BRANCH=${PR_OR_BRANCH:-actions}" >> "$GITHUB_ENV"
113+ if [[ "$PR_OR_BRANCH" =~ ^[0-9]+$ ]]; then
114+ echo "is_pr=true" >> "$GITHUB_OUTPUT"
115+ elif [[ "${{ github.event_name }}" = "pull_request" ]]; then
116+ echo "PR_OR_BRANCH=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
117+ echo "is_pr=true" >> "$GITHUB_OUTPUT"
118+ else
119+ echo "is_pr=false" >> "$GITHUB_OUTPUT"
120+ fi
121+
122+ - name : Fetch and checkout PR
123+ if : steps.check_pr.outputs.is_pr == 'true'
124+ run : |
125+ git fetch origin pull/"$PR_OR_BRANCH"/head:pr-"$PR_OR_BRANCH"
126+ git checkout pr-"$PR_OR_BRANCH"
127+
128+ - name : Checkout branch
129+ if : steps.check_pr.outputs.is_pr == 'false'
130+ run : git checkout "$PR_OR_BRANCH"
131+
132+ - name : Authenticate to Google Cloud
133+ id : auth
134+ uses : google-github-actions/auth@b7593ed2efd1c1617e1b0254da33b86225adb2a5
135+ with :
136+ credentials_json : ${{ secrets.GCP_SA_KEY }}
137+
138+ - name : Set up gcloud CLI and kubectl
139+ uses : google-github-actions/setup-gcloud@cb1e50a9932213ecece00a606661ae9ca44f3397
140+ with :
141+ project_id : ${{ env.GCP_PROJECT_ID }}
142+ install_components : ' kubectl,gke-gcloud-auth-plugin'
143+
144+ - name : Get GKE credentials
145+ run : |
146+ gcloud container clusters get-credentials "${{ env.GKE_CLUSTER_NAME }}" --zone "${{ env.GKE_CLUSTER_ZONE }}"
147+
148+ - name : Create namespace
149+ run : |
150+ kubectl create namespace "${NAMESPACE}" || echo "Namespace already exists"
151+
152+ - name : Create hf-token secret
153+ run : |
154+ kubectl create secret generic hf-token \
155+ --from-literal="token=${{ secrets.HF_TOKEN }}" \
156+ --namespace "${NAMESPACE}" \
157+ --dry-run=client -o yaml | kubectl apply -f -
158+
159+ - name : Create and Annotate KSA for Workload Identity
160+ run : |
161+ kubectl create serviceaccount $KSA_NAME --namespace "${NAMESPACE}" --dry-run=client -o yaml | kubectl apply -f -
162+ kubectl annotate serviceaccount $KSA_NAME \
163+ iam.gke.io/gcp-service-account=$GSA_EMAIL \
164+ --overwrite \
165+ --namespace "${NAMESPACE}"
166+
167+ - name : Deploy Model Server and CRDs
168+ run : |
169+ cd config/manifests/vllm
170+ echo "Deploying Model Server..."
171+ kubectl apply -f gpu-deployment.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
172+ echo "Installing CRDs"
173+ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.1.0/manifests.yaml
174+ echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
175+
176+ - name : Deploy InferencePool and Endpoint Picker Extension
177+ run : |
178+ export IGW_CHART_VERSION=v1.1.0
179+ helm install vllm-llama3-8b-instruct \
180+ --namespace $NAMESPACE \
181+ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
182+ --set provider.name=$GATEWAY_TYPE \
183+ --version $IGW_CHART_VERSION \
184+ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool | tee ~/igw-prefill-heavy-deployment.log
185+ echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
186+
187+ - name : Wait for all pods to be ready
188+ run : |
189+ kubectl wait pod \
190+ --for=condition=Ready \
191+ --all \
192+ -n "${NAMESPACE}" \
193+ --timeout=25m
194+ echo "✅ All pods are ready."
195+ kubectl get pods -n "${NAMESPACE}"
196+
197+ - name : Deploy Gateway
198+ run : |
199+ GATEWAY_NAME=inference-gateway
200+ kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found
201+ kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found
202+ echo "Deploying Gateway..."
203+ kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/gateway.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
204+ echo "Deploying HTTPRoute..."
205+ kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.1.0/config/manifests/gateway/gke/httproute.yaml -n ${NAMESPACE} | tee ~/igw-prefill-heavy-deployment.log
206+ echo "---------------------------------------" >> ~/igw-prefill-heavy-deployment.log
207+
208+ - name : Wait for gateway to be ready
209+ run : |
210+ GATEWAY_NAME=inference-gateway
211+ kubectl wait gateway/${GATEWAY_NAME} \
212+ --for=condition=Programmed=True \
213+ -n "${NAMESPACE}" \
214+ --timeout=500s
215+ echo "✅ Gateway is ready."
216+ kubectl get gateway -n "${NAMESPACE}"
217+
218+ - name : Show deployment status
219+ run : |
220+ echo "=== Deployments ==="
221+ kubectl get deployments -n "${NAMESPACE}"
222+ echo ""
223+ echo "=== Pods ==="
224+ kubectl get pods -n "${NAMESPACE}"
225+ echo ""
226+ echo "=== Services ==="
227+ kubectl get svc -n "${NAMESPACE}"
228+ echo ""
229+ echo "=== Helm releases ==="
230+ helm list -n "${NAMESPACE}" || true
231+ echo ""
232+ echo "=== Inference Pools ==="
233+ kubectl get inferencepools -n "${NAMESPACE}" || true
234+ echo ""
235+ echo "=== HTTPRoutes ==="
236+ kubectl get httproutes -n "${NAMESPACE}" -o yaml || true
237+ echo ""
238+ echo "=== Gateway ==="
239+ kubectl get Gateway -n "${NAMESPACE}" || true
240+ echo ""
241+
242+ - name : Verify installation and run validation test
243+ run : |
244+ cd .github/scripts/e2e
245+ ./e2e-validate.sh -n "${NAMESPACE}" -v -m ${MODEL}
246+
247+ - name : Run benchmarking test
248+ run : |
249+ TIMESTAMP=$(date +"%Y-%m-%d-%H-%M-%S")
250+ cd benchmarking/single-workload
251+ host="${GATEWAY_HOST:-$(kubectl get gateway -n "$NAMESPACE" \
252+ -o jsonpath='{.items[0].status.addresses[0].value}' 2>/dev/null || true)}"
253+ if [[ -z "$host" ]]; then
254+ echo "Error: could not discover a Gateway address in namespace '$NAMESPACE'." >&2
255+ exit 1
256+ fi
257+ port=80
258+ svc_host="${host}:${port}"
259+ helm install prefill-heavy-benchmark ../inference-perf/ -f prefill-heavy-values.yaml \
260+ --namespace "${NAMESPACE}" \
261+ --create-namespace \
262+ --set token.hfToken="${HF_TOKEN}" \
263+ --set "config.server.base_url=http://${svc_host}" \
264+ --set "job.serviceAccountName=$KSA_NAME" \
265+ --set "job.image.tag=latest" \
266+ --set "config.storage.google_cloud_storage.bucket_name=${GCS_BUCKET}" \
267+ --set "config.storage.google_cloud_storage.path=${NAMESPACE}/${TIMESTAMP}" \
268+ --set "gcsPath=gs://${GCS_BUCKET}/datasets/billsum_conversations.json" \
269+ --set "config.data.path=/gcsDataset/gcs-dataset.json" \
270+ --set-string 'job.resources.limits.nvidia\.com/gpu=1'
271+
272+ - name : Wait for benchmarking job to finish
273+ run : |
274+ job_name=prefill-heavy-benchmark-inference-perf-job
275+ TIMEOUT_DURATION="7200s"
276+ if ! kubectl wait --for=condition=complete job/"$job_name" -n "$NAMESPACE" --timeout="$TIMEOUT_DURATION"; then
277+ echo "Error: Benchmark job $job_name did not complete successfully within $TIMEOUT_DURATION." >&2
278+ echo "--- Job Description ---" >&2
279+ kubectl describe job "$job_name" -n "$NAMESPACE" >&2
280+ echo "--- Pod Logs (Last 50 lines) ---" >&2
281+ kubectl logs -l job-name="$job_name" -n "$NAMESPACE" --all-containers=true --tail 50 >&2
282+ exit 1
283+ fi
284+ echo "✅ Benchmarking Job Completed."
285+
286+ - name : Collect and upload Kubernetes pod logs
287+ if : always()
288+ run : |
289+ mkdir -p pod-logs-inference-prefill-heavy
290+ cd pod-logs-inference-prefill-heavy
291+ echo "Fetching ${NAMESPACE} pods log..."
292+ kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
293+ | xargs -I{} sh -c 'kubectl logs --all-containers=true -n "${NAMESPACE}" {} > "{}.log" 2>&1'
294+ echo "Fetching ${NAMESPACE} pods descriptions..."
295+ kubectl get pods -n "${NAMESPACE}" --no-headers -o custom-columns=":metadata.name" \
296+ | xargs -I{} sh -c 'kubectl describe pod -n "${NAMESPACE}" {} > "{}-describe.log" 2>&1'
297+ mv ~/igw-prefill-heavy-deployment.log . || true
298+ mv ~/install-deps.log . || true
299+
300+ - name : Upload pod logs as artifact
301+ uses : actions/upload-artifact@v4
302+ if : always()
303+ with :
304+ name : igw-pod-logs-inference-prefill-heavy-${{ matrix.accelerator.name }}
305+ path : pod-logs-inference-prefill-heavy
306+
307+ - name : Send Google Chat notification on failure
308+ if : failure()
309+ uses : SimonScholz/google-chat-action@3b3519e5102dba8aa5046fd711c4b553586409bb
310+ with :
311+ webhookUrl : ${{ secrets.GOOGLE_CHAT_WEBHOOK }}
312+ jobStatus : ${{ job.status }}
313+ title : ' ${{ github.workflow }} - ${{ matrix.accelerator.name }}'
314+
315+ - name : Cleanup deployment
316+ if : always()
317+ run : |
318+ GATEWAY_NAME=inference-gateway
319+ helm uninstall vllm-llama3-8b-instruct -n ${NAMESPACE} --ignore-not-found
320+ helm uninstall prefill-heavy-benchmark -n ${NAMESPACE} --ignore-not-found
321+ kubectl delete httproute llm-route -n ${NAMESPACE} --ignore-not-found
322+ kubectl delete gateway ${GATEWAY_NAME} -n ${NAMESPACE} --ignore-not-found
0 commit comments