|
| 1 | +name: Full Kubeflow End-to-End Integration Test |
| 2 | +on: |
| 3 | + workflow_dispatch: |
| 4 | + push: |
| 5 | + branches: |
| 6 | + - master |
| 7 | +# pull_request: |
| 8 | +# branches: |
| 9 | +# - master |
| 10 | +jobs: |
| 11 | + build: |
| 12 | + name: End-to-End Integration Test |
| 13 | + runs-on: |
| 14 | + labels: ubuntu-latest-16-cores |
| 15 | + timeout-minutes: 60 |
| 16 | + env: |
| 17 | + KIND_CLUSTER_NAME: kubeflow |
| 18 | + steps: |
| 19 | + - name: Checkout Repository |
| 20 | + uses: actions/checkout@v4 |
| 21 | + # Infrastructure Setup |
| 22 | + - name: Install KinD and Create Kubernetes Cluster |
| 23 | + run: ./tests/gh-actions/install_KinD_create_KinD_cluster_install_kustomize.sh |
| 24 | + - name: Install Kubectl Command Line Tool |
| 25 | + run: ./tests/gh-actions/install_kubectl.sh |
| 26 | + - name: Create Kubeflow Namespace |
| 27 | + run: kustomize build common/kubeflow-namespace/base | kubectl apply -f - |
| 28 | + - name: Install Certificate Manager |
| 29 | + run: ./tests/gh-actions/install_cert_manager.sh |
| 30 | + - name: Install Istio Service Mesh |
| 31 | + run: ./tests/gh-actions/install_istio-cni.sh |
| 32 | + - name: Install OAuth2 Proxy for Authentication |
| 33 | + run: ./tests/gh-actions/install_oauth2-proxy.sh |
| 34 | + - name: Install Kubeflow Istio Resources |
| 35 | + run: kustomize build common/istio-cni-1-24/kubeflow-istio-resources/base | kubectl apply -f - |
| 36 | + - name: Install KF Multi Tenancy |
| 37 | + run: ./tests/gh-actions/install_multi_tenancy.sh |
| 38 | + # Right now KFP also modifies user namespaces |
| 39 | + - name: Deploy Kubeflow Pipeline Components |
| 40 | + run: ./tests/gh-actions/install_pipelines.sh |
| 41 | + - name: Install dex |
| 42 | + run: | |
| 43 | + echo "Installing Dex..." |
| 44 | + kustomize build ./common/dex/overlays/oauth2-proxy | kubectl apply -f - |
| 45 | +
|
| 46 | + echo "Waiting for pods in auth namespace to become ready..." |
| 47 | + kubectl wait --for=condition=Ready pods --all --timeout=180s -n auth |
| 48 | + - name: Install central-dashboard |
| 49 | + run: | |
| 50 | + kustomize build apps/centraldashboard/upstream/overlays/kserve | kubectl apply -f - |
| 51 | + kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 180s |
| 52 | + - name: Create Kubeflow User Profile |
| 53 | + run: kustomize build common/user-namespace/base | kubectl apply -f - |
| 54 | + - name: Verify User Profile Setup |
| 55 | + run: | |
| 56 | + # Wait for profile controller to process the request |
| 57 | + sleep 60 |
| 58 | +
|
| 59 | + # Verify profile resources are properly created |
| 60 | + KF_PROFILE=kubeflow-user-example-com |
| 61 | + kubectl -n $KF_PROFILE get pods,configmaps,secrets |
| 62 | +
|
| 63 | + # Verify minio secret exists (critical for ML pipelines) |
| 64 | + if ! kubectl get secret mlpipeline-minio-artifact -n $KF_PROFILE > /dev/null 2>&1; then |
| 65 | + echo "Error: Secret mlpipeline-minio-artifact not found in namespace $KF_PROFILE" |
| 66 | + exit 1 |
| 67 | + fi |
| 68 | + - name: Set up Python Environment |
| 69 | + uses: actions/setup-python@v4 |
| 70 | + with: |
| 71 | + python-version: '3.12' |
| 72 | + - name: Install Python Dependencies |
| 73 | + run: | |
| 74 | + pip install pytest kubernetes kfp==2.11.0 kserve pytest-timeout pyyaml requests |
| 75 | + - name: port forward |
| 76 | + run: | |
| 77 | + ingress_gateway_service=$(kubectl get svc --namespace istio-system --selector="app=istio-ingressgateway" --output jsonpath='{.items[0].metadata.name}') |
| 78 | + nohup kubectl port-forward --namespace istio-system svc/${ingress_gateway_service} 8080:80 & |
| 79 | + while ! curl localhost:8080; do echo waiting for port-forwarding; sleep 1; done; echo port-forwarding ready |
| 80 | + - name: test dex login |
| 81 | + run: | |
| 82 | + pip3 install requests |
| 83 | + ./tests/gh-actions/test_dex_login.py |
| 84 | + - name: Run ML Pipeline Integration Tests |
| 85 | + run: | |
| 86 | + KF_PROFILE=kubeflow-user-example-com |
| 87 | +
|
| 88 | + # Test with authorized token (authorized user flow) |
| 89 | + TOKEN="$(kubectl -n $KF_PROFILE create token default-editor)" |
| 90 | + echo "Running pipeline with authorized token (authorized user)" |
| 91 | + python3 tests/gh-actions/pipeline_test.py run_pipeline "${TOKEN}" "${KF_PROFILE}" |
| 92 | +
|
| 93 | + # Test with unauthorized token (unauthorized user flow) |
| 94 | + echo "Testing unauthorized access prevention (security check)" |
| 95 | + TOKEN="$(kubectl -n default create token default)" |
| 96 | + python3 tests/gh-actions/pipeline_test.py test_unauthorized_access "${TOKEN}" "${KF_PROFILE}" |
| 97 | + # Web UI Component Tests - Basic Connectivity Checks |
| 98 | + - name: Verify Central Dashboard and Component UIs |
| 99 | + run: | |
| 100 | + # Test central dashboard is accessible |
| 101 | + echo "Verifying Central Dashboard accessibility" |
| 102 | + curl -I http://localhost:8080/ |
| 103 | +
|
| 104 | + # Check individual component UIs |
| 105 | + echo "Verifying Notebooks UI accessibility" |
| 106 | + curl -I http://localhost:8080/jupyter/ |
| 107 | +
|
| 108 | + echo "Verifying Pipelines UI accessibility" |
| 109 | + curl -I http://localhost:8080/pipeline/ |
| 110 | +
|
| 111 | + echo "Verifying KServe Models UI accessibility" |
| 112 | + curl -I http://localhost:8080/models/ |
| 113 | +
|
| 114 | + echo "Verifying Katib Experiments UI accessibility" |
| 115 | + curl -I http://localhost:8080/katib/ |
| 116 | + - name: Install NWorkspace / Notebook components (jupyter-web-application, notebook-controller, poddefaults) |
| 117 | + run: | |
| 118 | + kustomize build apps/jupyter/jupyter-web-app/upstream/overlays/istio/ | kubectl apply -f - |
| 119 | + kustomize build apps/jupyter/notebook-controller/upstream/overlays/kubeflow/ | kubectl apply -f - |
| 120 | + kustomize build apps/admission-webhook/upstream/overlays/cert-manager | kubectl apply -f - |
| 121 | + kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 300s \ |
| 122 | + --field-selector=status.phase!=Succeeded |
| 123 | + - name: Apply PodDefaults for Notebook Integration |
| 124 | + run: kubectl apply -f tests/gh-actions/kf-objects/poddefaults.access-ml-pipeline.kubeflow-user-example-com.yaml |
| 125 | + - name: Create and Verify Notebook Server |
| 126 | + run: | |
| 127 | + # Apply the notebook definition directly from tests |
| 128 | + kubectl apply -f tests/gh-actions/kf-objects/notebook.test.kubeflow-user-example.com.yaml |
| 129 | +
|
| 130 | + # Wait for notebook server to be ready - using exact syntax from pipeline tests |
| 131 | + kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 \ |
| 132 | + -f tests/gh-actions/kf-objects/notebook.test.kubeflow-user-example.com.yaml \ |
| 133 | + --timeout 600s |
| 134 | + - name: Run Pipeline from Notebook |
| 135 | + run: | |
| 136 | + # Execute pipeline from notebook using the existing test script |
| 137 | + KF_PROFILE=kubeflow-user-example-com |
| 138 | + if [ -f "tests/gh-actions/run_and_wait_kubeflow_pipeline.py" ]; then |
| 139 | + kubectl -n $KF_PROFILE cp \ |
| 140 | + ./tests/gh-actions/run_and_wait_kubeflow_pipeline.py \ |
| 141 | + test-0:/home/jovyan/run_and_wait_kubeflow_pipeline.py |
| 142 | +
|
| 143 | + kubectl -n $KF_PROFILE exec -ti \ |
| 144 | + test-0 -- python /home/jovyan/run_and_wait_kubeflow_pipeline.py |
| 145 | + else |
| 146 | + echo "Skipping pipeline run from notebook test - script not found" |
| 147 | + exit 1 |
| 148 | + fi |
| 149 | + - name: Test Katib Hyperparameter Tuning |
| 150 | + run: | |
| 151 | + # Apply Katib experiment test directly from tests directory |
| 152 | + if kubectl get crd experiments.kubeflow.org > /dev/null 2>&1; then |
| 153 | + KF_PROFILE=kubeflow-user-example-com |
| 154 | + sed "s/kubeflow-user/$KF_PROFILE/g" tests/gh-actions/kf-objects/katib_test.yaml | kubectl apply -f - |
| 155 | + kubectl get experiments -n ${KF_PROFILE} |
| 156 | + else |
| 157 | + echo "Katib CRD not found, skipping Katib hyperparameter tuning tests" |
| 158 | + exit 1 |
| 159 | + fi |
| 160 | + - name: Test Distributed Training with Training Operator |
| 161 | + run: | |
| 162 | + # Install Training Operator if needed using script from tests directory |
| 163 | + if ! kubectl get crd tfjobs.kubeflow.org > /dev/null 2>&1; then |
| 164 | + ./tests/gh-actions/install_training_operator.sh |
| 165 | + fi |
| 166 | +
|
| 167 | + # Apply the PyTorch job YAML directly from tests directory |
| 168 | + if kubectl get crd pytorchjobs.kubeflow.org > /dev/null 2>&1; then |
| 169 | + KF_PROFILE=kubeflow-user-example-com |
| 170 | + sed "s/namespace: .*/namespace: $KF_PROFILE/g" tests/gh-actions/kf-objects/training_operator_job.yaml | kubectl apply -f - |
| 171 | + kubectl get pytorchjobs -n ${KF_PROFILE} |
| 172 | + else |
| 173 | + echo "Training Operator CRDs not found, skipping distributed training tests" |
| 174 | + exit 1 |
| 175 | + fi |
| 176 | + - name: Install KNative Serving Platform |
| 177 | + run: ./tests/gh-actions/install_knative.sh |
| 178 | + # TODO install Kserve |
| 179 | + - name: Setup Port Forwarding for Istio Gateway |
| 180 | + run: | |
| 181 | + ingress_gateway_service=$(kubectl get svc --namespace istio-system --selector="app=istio-ingressgateway" --output jsonpath='{.items[0].metadata.name}') |
| 182 | + nohup kubectl port-forward --namespace istio-system svc/${ingress_gateway_service} 8080:80 & |
| 183 | + while ! curl localhost:8080; do echo waiting for port-forwarding; sleep 1; done; echo port-forwarding ready |
| 184 | + - name: Test KServe Model Deployment and Serving |
| 185 | + run: | |
| 186 | + # Install KServe if needed using the script from tests directory |
| 187 | + if ! kubectl get crd inferenceservices.serving.kserve.io > /dev/null 2>&1; then |
| 188 | + ./tests/gh-actions/install_kserve.sh |
| 189 | + fi |
| 190 | +
|
| 191 | + # Apply the KServe inference service test directly from tests directory |
| 192 | + if kubectl get crd inferenceservices.serving.kserve.io > /dev/null 2>&1; then |
| 193 | + KF_PROFILE=kubeflow-user-example-com |
| 194 | + sed -e "/metadata:/a\\ namespace: $KF_PROFILE" tests/gh-actions/kf-objects/kserve_test.yaml | kubectl apply -f - |
| 195 | + kubectl wait --for=condition=ready --timeout=120s -n ${KF_PROFILE} isvc/sklearn-iris |
| 196 | + else |
| 197 | + echo "KServe CRD not found, skipping model serving tests" |
| 198 | + exit 1 |
| 199 | + fi |
| 200 | + - name: Test Apache Spark Integration |
| 201 | + run: | |
| 202 | + if [ -f "tests/gh-actions/spark_install.sh" ] && [ -f "tests/gh-actions/spark_test.sh" ]; then |
| 203 | + KF_PROFILE=kubeflow-user-example-com |
| 204 | + chmod u+x tests/gh-actions/spark_*.sh |
| 205 | + ./tests/gh-actions/spark_install.sh |
| 206 | + ./tests/gh-actions/spark_test.sh "${KF_PROFILE}" |
| 207 | + else |
| 208 | + echo "Skipping Spark integration tests - scripts not found" |
| 209 | + exit 1 |
| 210 | + fi |
| 211 | + - name: Test Pod Security Standards |
| 212 | + run: | |
| 213 | + # Apply baseline Pod Security Standards using script from tests |
| 214 | + ./tests/gh-actions/enable_baseline_PSS.sh |
| 215 | +
|
| 216 | + # Verify pods are running with baseline security standards |
| 217 | + kubectl get pods --all-namespaces |
| 218 | +
|
| 219 | + # Unapply baseline labels - following exact pattern from other workflows |
| 220 | + NAMESPACES=("istio-system" "auth" "cert-manager" "oauth2-proxy" "kubeflow" "knative-serving") |
| 221 | + for NAMESPACE in "${NAMESPACES[@]}"; do |
| 222 | + if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then |
| 223 | + kubectl label namespace $NAMESPACE pod-security.kubernetes.io/enforce- |
| 224 | + fi |
| 225 | + done |
| 226 | +
|
| 227 | + # Apply restricted Pod Security Standards using script from tests |
| 228 | + ./tests/gh-actions/enable_restricted_PSS.sh |
| 229 | +
|
| 230 | + # Verify pods still work with restricted security standards |
| 231 | + kubectl get pods --all-namespaces |
| 232 | + - name: Verify All Components Running Successfully |
| 233 | + run: | |
| 234 | + # Run non-root security tests if available |
| 235 | + if [ -f "tests/gh-actions/runasnonroot.sh" ]; then |
| 236 | + echo "Running non-root user security tests..." |
| 237 | + chmod +x tests/gh-actions/runasnonroot.sh |
| 238 | + ./tests/gh-actions/runasnonroot.sh |
| 239 | + fi |
| 240 | +
|
| 241 | + # Verify all components are running |
| 242 | + echo "Checking status of critical components..." |
| 243 | + kubectl get deployment -n kubeflow |
| 244 | + kubectl get deployment -n cert-manager |
| 245 | + kubectl get deployment -n istio-system |
| 246 | + kubectl get deployment -n auth |
| 247 | +
|
| 248 | + # Check for failed pods |
| 249 | + if kubectl get pods --all-namespaces | grep -E '(Error|CrashLoopBackOff)'; then |
| 250 | + echo "Found pods in failed state" |
| 251 | + exit 1 |
| 252 | + fi |
| 253 | +
|
| 254 | + echo "All Kubeflow components are running successfully" |
| 255 | + - name: Collect Diagnostic Logs on Failure |
| 256 | + if: failure() |
| 257 | + run: | |
| 258 | + mkdir -p logs |
| 259 | +
|
| 260 | + # Collect resource status |
| 261 | + kubectl get all --all-namespaces > logs/all-resources.txt |
| 262 | + kubectl get events --all-namespaces --sort-by=.metadata.creationTimestamp > logs/all-events.txt |
| 263 | +
|
| 264 | + # Collect CRD status |
| 265 | + kubectl get crds | grep -E 'kubeflow|istio|knative|cert-manager|kserve' > logs/crds.txt || true |
| 266 | +
|
| 267 | + # Collect pod descriptions |
| 268 | + namespaces=("kubeflow" "istio-system" "cert-manager" "auth") |
| 269 | + for ns in "${namespaces[@]}"; do |
| 270 | + kubectl describe pods -n $ns > logs/$ns-pod-descriptions.txt |
| 271 | +
|
| 272 | + # Collect logs for each pod in namespace |
| 273 | + for pod in $(kubectl get pods -n $ns -o jsonpath='{.items[*].metadata.name}'); do |
| 274 | + kubectl logs -n $ns $pod --tail=100 > logs/$ns-$pod.txt 2>&1 || true |
| 275 | + done |
| 276 | + done |
| 277 | +
|
| 278 | + echo "Collected logs to logs/ directory" |
| 279 | + - name: Upload Diagnostic Logs |
| 280 | + if: always() |
| 281 | + uses: actions/upload-artifact@v4 |
| 282 | + with: |
| 283 | + name: kubeflow-test-logs |
| 284 | + path: logs/ |
0 commit comments