Skip to content

Commit c1ae849

Browse files
Add comprehensive Kubeflow integration test GitHub Actions workflow (#3070)
* Add comprehensive Kubeflow integration test GitHub Actions workflow Signed-off-by: kunal-511 <[email protected]> * updated by using existing tests Signed-off-by: kunal-511 <[email protected]> * Updated the workflow to maximize the use of tests folder Signed-off-by: kunal-511 <[email protected]> * Update full_kubeflow_integration_test.yaml Signed-off-by: Julius von Kohout <[email protected]> Signed-off-by: kunal-511 <[email protected]> * Update full_kubeflow_integration_test.yaml Signed-off-by: Julius von Kohout <[email protected]> Signed-off-by: kunal-511 <[email protected]> * fix again the illegal commits Signed-off-by: juliusvonkohout <[email protected]> * Update full_kubeflow_integration_test.yaml Signed-off-by: Julius von Kohout <[email protected]> --------- Signed-off-by: kunal-511 <[email protected]> Signed-off-by: Julius von Kohout <[email protected]> Signed-off-by: juliusvonkohout <[email protected]> Co-authored-by: Julius von Kohout <[email protected]>
1 parent af624af commit c1ae849

File tree

1 file changed

+284
-0
lines changed

1 file changed

+284
-0
lines changed
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
name: Full Kubeflow End-to-End Integration Test
2+
on:
3+
workflow_dispatch:
4+
push:
5+
branches:
6+
- master
7+
# pull_request:
8+
# branches:
9+
# - master
10+
jobs:
11+
build:
12+
name: End-to-End Integration Test
13+
runs-on:
14+
labels: ubuntu-latest-16-cores
15+
timeout-minutes: 60
16+
env:
17+
KIND_CLUSTER_NAME: kubeflow
18+
steps:
19+
- name: Checkout Repository
20+
uses: actions/checkout@v4
21+
# Infrastructure Setup
22+
- name: Install KinD and Create Kubernetes Cluster
23+
run: ./tests/gh-actions/install_KinD_create_KinD_cluster_install_kustomize.sh
24+
- name: Install Kubectl Command Line Tool
25+
run: ./tests/gh-actions/install_kubectl.sh
26+
- name: Create Kubeflow Namespace
27+
run: kustomize build common/kubeflow-namespace/base | kubectl apply -f -
28+
- name: Install Certificate Manager
29+
run: ./tests/gh-actions/install_cert_manager.sh
30+
- name: Install Istio Service Mesh
31+
run: ./tests/gh-actions/install_istio-cni.sh
32+
- name: Install OAuth2 Proxy for Authentication
33+
run: ./tests/gh-actions/install_oauth2-proxy.sh
34+
- name: Install Kubeflow Istio Resources
35+
run: kustomize build common/istio-cni-1-24/kubeflow-istio-resources/base | kubectl apply -f -
36+
- name: Install KF Multi Tenancy
37+
run: ./tests/gh-actions/install_multi_tenancy.sh
38+
# Right now KFP also modifies user namespaces
39+
- name: Deploy Kubeflow Pipeline Components
40+
run: ./tests/gh-actions/install_pipelines.sh
41+
- name: Install dex
42+
run: |
43+
echo "Installing Dex..."
44+
kustomize build ./common/dex/overlays/oauth2-proxy | kubectl apply -f -
45+
46+
echo "Waiting for pods in auth namespace to become ready..."
47+
kubectl wait --for=condition=Ready pods --all --timeout=180s -n auth
48+
- name: Install central-dashboard
49+
run: |
50+
kustomize build apps/centraldashboard/upstream/overlays/kserve | kubectl apply -f -
51+
kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 180s
52+
- name: Create Kubeflow User Profile
53+
run: kustomize build common/user-namespace/base | kubectl apply -f -
54+
- name: Verify User Profile Setup
55+
run: |
56+
# Wait for profile controller to process the request
57+
sleep 60
58+
59+
# Verify profile resources are properly created
60+
KF_PROFILE=kubeflow-user-example-com
61+
kubectl -n $KF_PROFILE get pods,configmaps,secrets
62+
63+
# Verify minio secret exists (critical for ML pipelines)
64+
if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then
65+
echo "Error: Secret mlpipeline-minio-artifact not found in namespace $KF_PROFILE"
66+
exit 1
67+
fi
68+
- name: Set up Python Environment
69+
uses: actions/setup-python@v4
70+
with:
71+
python-version: '3.12'
72+
- name: Install Python Dependencies
73+
run: |
74+
pip install pytest kubernetes kfp==2.11.0 kserve pytest-timeout pyyaml requests
75+
- name: port forward
76+
run: |
77+
ingress_gateway_service=$(kubectl get svc --namespace istio-system --selector="app=istio-ingressgateway" --output jsonpath='{.items[0].metadata.name}')
78+
nohup kubectl port-forward --namespace istio-system svc/${ingress_gateway_service} 8080:80 &
79+
while ! curl localhost:8080; do echo waiting for port-forwarding; sleep 1; done; echo port-forwarding ready
80+
- name: test dex login
81+
run: |
82+
pip3 install requests
83+
./tests/gh-actions/test_dex_login.py
84+
- name: Run ML Pipeline Integration Tests
85+
run: |
86+
KF_PROFILE=kubeflow-user-example-com
87+
88+
# Test with authorized token (authorized user flow)
89+
TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)"
90+
echo "Running pipeline with authorized token (authorized user)"
91+
python3 tests/gh-actions/pipeline_test.py run_pipeline "${TOKEN}" "${KF_PROFILE}"
92+
93+
# Test with unauthorized token (unauthorized user flow)
94+
echo "Testing unauthorized access prevention (security check)"
95+
TOKEN="$(kubectl -n default create token default)"
96+
python3 tests/gh-actions/pipeline_test.py test_unauthorized_access "${TOKEN}" "${KF_PROFILE}"
97+
# Web UI Component Tests - Basic Connectivity Checks
98+
- name: Verify Central Dashboard and Component UIs
99+
run: |
100+
# Test central dashboard is accessible
101+
echo "Verifying Central Dashboard accessibility"
102+
curl -I http://localhost:8080/
103+
104+
# Check individual component UIs
105+
echo "Verifying Notebooks UI accessibility"
106+
curl -I http://localhost:8080/jupyter/
107+
108+
echo "Verifying Pipelines UI accessibility"
109+
curl -I http://localhost:8080/pipeline/
110+
111+
echo "Verifying KServe Models UI accessibility"
112+
curl -I http://localhost:8080/models/
113+
114+
echo "Verifying Katib Experiments UI accessibility"
115+
curl -I http://localhost:8080/katib/
116+
- name: Install NWorkspace / Notebook components (jupyter-web-application, notebook-controller, poddefaults)
117+
run: |
118+
kustomize build apps/jupyter/jupyter-web-app/upstream/overlays/istio/ | kubectl apply -f -
119+
kustomize build apps/jupyter/notebook-controller/upstream/overlays/kubeflow/ | kubectl apply -f -
120+
kustomize build apps/admission-webhook/upstream/overlays/cert-manager | kubectl apply -f -
121+
kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 300s \
122+
--field-selector=status.phase!=Succeeded
123+
- name: Apply PodDefaults for Notebook Integration
124+
run: kubectl apply -f tests/gh-actions/kf-objects/poddefaults.access-ml-pipeline.kubeflow-user-example-com.yaml
125+
- name: Create and Verify Notebook Server
126+
run: |
127+
# Apply the notebook definition directly from tests
128+
kubectl apply -f tests/gh-actions/kf-objects/notebook.test.kubeflow-user-example.com.yaml
129+
130+
# Wait for notebook server to be ready - using exact syntax from pipeline tests
131+
kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 \
132+
-f tests/gh-actions/kf-objects/notebook.test.kubeflow-user-example.com.yaml \
133+
--timeout 600s
134+
- name: Run Pipeline from Notebook
135+
run: |
136+
# Execute pipeline from notebook using the existing test script
137+
KF_PROFILE=kubeflow-user-example-com
138+
if [ -f "tests/gh-actions/run_and_wait_kubeflow_pipeline.py" ]; then
139+
kubectl -n $KF_PROFILE cp \
140+
./tests/gh-actions/run_and_wait_kubeflow_pipeline.py \
141+
test-0:/home/jovyan/run_and_wait_kubeflow_pipeline.py
142+
143+
kubectl -n $KF_PROFILE exec -ti \
144+
test-0 -- python /home/jovyan/run_and_wait_kubeflow_pipeline.py
145+
else
146+
echo "Skipping pipeline run from notebook test - script not found"
147+
exit 1
148+
fi
149+
- name: Test Katib Hyperparameter Tuning
150+
run: |
151+
# Apply Katib experiment test directly from tests directory
152+
if kubectl get crd experiments.kubeflow.org > /dev/null 2>&1; then
153+
KF_PROFILE=kubeflow-user-example-com
154+
sed "s/kubeflow-user/$KF_PROFILE/g" tests/gh-actions/kf-objects/katib_test.yaml | kubectl apply -f -
155+
kubectl get experiments -n ${KF_PROFILE}
156+
else
157+
echo "Katib CRD not found, skipping Katib hyperparameter tuning tests"
158+
exit 1
159+
fi
160+
- name: Test Distributed Training with Training Operator
161+
run: |
162+
# Install Training Operator if needed using script from tests directory
163+
if ! kubectl get crd tfjobs.kubeflow.org > /dev/null 2>&1; then
164+
./tests/gh-actions/install_training_operator.sh
165+
fi
166+
167+
# Apply the PyTorch job YAML directly from tests directory
168+
if kubectl get crd pytorchjobs.kubeflow.org > /dev/null 2>&1; then
169+
KF_PROFILE=kubeflow-user-example-com
170+
sed "s/namespace: .*/namespace: $KF_PROFILE/g" tests/gh-actions/kf-objects/training_operator_job.yaml | kubectl apply -f -
171+
kubectl get pytorchjobs -n ${KF_PROFILE}
172+
else
173+
echo "Training Operator CRDs not found, skipping distributed training tests"
174+
exit 1
175+
fi
176+
- name: Install KNative Serving Platform
177+
run: ./tests/gh-actions/install_knative.sh
178+
# TODO install Kserve
179+
- name: Setup Port Forwarding for Istio Gateway
180+
run: |
181+
ingress_gateway_service=$(kubectl get svc --namespace istio-system --selector="app=istio-ingressgateway" --output jsonpath='{.items[0].metadata.name}')
182+
nohup kubectl port-forward --namespace istio-system svc/${ingress_gateway_service} 8080:80 &
183+
while ! curl localhost:8080; do echo waiting for port-forwarding; sleep 1; done; echo port-forwarding ready
184+
- name: Test KServe Model Deployment and Serving
185+
run: |
186+
# Install KServe if needed using the script from tests directory
187+
if ! kubectl get crd inferenceservices.serving.kserve.io > /dev/null 2>&1; then
188+
./tests/gh-actions/install_kserve.sh
189+
fi
190+
191+
# Apply the KServe inference service test directly from tests directory
192+
if kubectl get crd inferenceservices.serving.kserve.io > /dev/null 2>&1; then
193+
KF_PROFILE=kubeflow-user-example-com
194+
sed -e "/metadata:/a\\ namespace: $KF_PROFILE" tests/gh-actions/kf-objects/kserve_test.yaml | kubectl apply -f -
195+
kubectl wait --for=condition=ready --timeout=120s -n ${KF_PROFILE} isvc/sklearn-iris
196+
else
197+
echo "KServe CRD not found, skipping model serving tests"
198+
exit 1
199+
fi
200+
- name: Test Apache Spark Integration
201+
run: |
202+
if [ -f "tests/gh-actions/spark_install.sh" ] && [ -f "tests/gh-actions/spark_test.sh" ]; then
203+
KF_PROFILE=kubeflow-user-example-com
204+
chmod u+x tests/gh-actions/spark_*.sh
205+
./tests/gh-actions/spark_install.sh
206+
./tests/gh-actions/spark_test.sh "${KF_PROFILE}"
207+
else
208+
echo "Skipping Spark integration tests - scripts not found"
209+
exit 1
210+
fi
211+
- name: Test Pod Security Standards
212+
run: |
213+
# Apply baseline Pod Security Standards using script from tests
214+
./tests/gh-actions/enable_baseline_PSS.sh
215+
216+
# Verify pods are running with baseline security standards
217+
kubectl get pods --all-namespaces
218+
219+
# Unapply baseline labels - following exact pattern from other workflows
220+
NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow" "knative-serving")
221+
for NAMESPACE in "${NAMESPACES[@]}"; do
222+
if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then
223+
kubectl label namespace $NAMESPACE pod-security.kubernetes.io/enforce-
224+
fi
225+
done
226+
227+
# Apply restricted Pod Security Standards using script from tests
228+
./tests/gh-actions/enable_restricted_PSS.sh
229+
230+
# Verify pods still work with restricted security standards
231+
kubectl get pods --all-namespaces
232+
- name: Verify All Components Running Successfully
233+
run: |
234+
# Run non-root security tests if available
235+
if [ -f "tests/gh-actions/runasnonroot.sh" ]; then
236+
echo "Running non-root user security tests..."
237+
chmod +x tests/gh-actions/runasnonroot.sh
238+
./tests/gh-actions/runasnonroot.sh
239+
fi
240+
241+
# Verify all components are running
242+
echo "Checking status of critical components..."
243+
kubectl get deployment -n kubeflow
244+
kubectl get deployment -n cert-manager
245+
kubectl get deployment -n istio-system
246+
kubectl get deployment -n auth
247+
248+
# Check for failed pods
249+
if kubectl get pods --all-namespaces | grep -E '(Error|CrashLoopBackOff)'; then
250+
echo "Found pods in failed state"
251+
exit 1
252+
fi
253+
254+
echo "All Kubeflow components are running successfully"
255+
- name: Collect Diagnostic Logs on Failure
256+
if: failure()
257+
run: |
258+
mkdir -p logs
259+
260+
# Collect resource status
261+
kubectl get all --all-namespaces > logs/all-resources.txt
262+
kubectl get events --all-namespaces --sort-by=.metadata.creationTimestamp > logs/all-events.txt
263+
264+
# Collect CRD status
265+
kubectl get crds | grep -E 'kubeflow|istio|knative|cert-manager|kserve' > logs/crds.txt || true
266+
267+
# Collect pod descriptions
268+
namespaces=("kubeflow" "istio-system" "cert-manager" "auth")
269+
for ns in "${namespaces[@]}"; do
270+
kubectl describe pods -n $ns > logs/$ns-pod-descriptions.txt
271+
272+
# Collect logs for each pod in namespace
273+
for pod in $(kubectl get pods -n $ns -o jsonpath='{.items[*].metadata.name}'); do
274+
kubectl logs -n $ns $pod --tail=100 > logs/$ns-$pod.txt 2>&1 || true
275+
done
276+
done
277+
278+
echo "Collected logs to logs/ directory"
279+
- name: Upload Diagnostic Logs
280+
if: always()
281+
uses: actions/upload-artifact@v4
282+
with:
283+
name: kubeflow-test-logs
284+
path: logs/

0 commit comments

Comments
 (0)