Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vsphere e2e tests setup #1006

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
319 changes: 319 additions & 0 deletions .github/workflows/ci-vspehere.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: CI-vsphere

on:
pull_request:
types:
- opened
- synchronize
branches:
# SHIVA
# - main
# - release-*
- vspeheree2etest
push:
branches:
# - main
# - release-*
- vspeheree2etest

jobs:
### Configuration checks ###
helm-lint:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install Helm
uses: azure/[email protected]
id: install
- run: helm lint deployments/gpu-operator/
validate-csv:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION ?=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- run: make validate-csv
validate-helm-values:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION ?=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- run: make validate-helm-values

### Golang checks and build ###
go-check:
needs: [helm-lint, validate-csv, validate-helm-values]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION ?=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- name: Lint
uses: golangci/golangci-lint-action@v6
with:
version: v1.60.3
args: -v --timeout 5m
skip-cache: true
- run: make check
go-test:
needs: [helm-lint, validate-csv, validate-helm-values]
name: unit tests
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Golang version
id: vars
run: |
GOLANG_VERSION=$( grep "GOLANG_VERSION ?=" versions.mk )
echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GOLANG_VERSION }}
- run: make coverage
go-build:
needs: [helm-lint, validate-csv, validate-helm-values]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Checkout code
- run: make docker-build

### Image builds ###
build-gpu-operator:
needs: [go-check, go-test, go-build]
runs-on: ubuntu-latest
strategy:
matrix:
dist: [ubi9]
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Calculate build vars
id: vars
run: |
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
echo "${REPO_FULL_NAME}"
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV

GENERATE_ARTIFACTS="false"
if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
GENERATE_ARTIFACTS="false"
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
GENERATE_ARTIFACTS="true"
elif [[ "${{ github.event_name }}" == "push" ]]; then
GENERATE_ARTIFACTS="true"
fi
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build image
env:
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator
VERSION: ${COMMIT_SHORT_SHA}
run: |
echo "${VERSION}"
make build-${{ matrix.dist }}
build-gpu-operator-validator:
needs: [go-check, go-test, go-build]
runs-on: ubuntu-latest
strategy:
matrix:
dist: [ubi9]
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Calculate build vars
id: vars
run: |
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
echo "${REPO_FULL_NAME}"
echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV

GENERATE_ARTIFACTS="false"
if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
GENERATE_ARTIFACTS="false"
elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
GENERATE_ARTIFACTS="true"
elif [[ "${{ github.event_name }}" == "push" ]]; then
GENERATE_ARTIFACTS="true"
fi
echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build image
env:
IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator
VERSION: ${COMMIT_SHORT_SHA}
SUBCOMPONENT: validator
run: |
echo "${VERSION}"
make build-${{ matrix.dist }}

### e2e tests ###
e2e-tests-containerd:
needs: [build-gpu-operator, build-gpu-operator-validator]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Set up Holodeck
uses: NVIDIA/[email protected]
with:
vsphere_username: ${{ secrets.VSPHERE_USERNAME }}
vsphere_password: ${{ secrets.VSPHERE_PASSWORD }}
vsphere_ssh_key: ${{ secrets.VSPHERE_SSH_KEY }}
holodeck_config: "tests/holodeck_vsphere.yaml"
- name: Get public dns name
id: get_public_dns_name
uses: mikefarah/yq@master
with:
cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
- name: Calculate test vars
id: vars
run: |
COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV

echo "OPERATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
echo "OPERATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator" >> $GITHUB_ENV
echo "VALIDATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
echo "VALIDATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator" >> $GITHUB_ENV

echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
- name: Run e2e tests
env:
GPU_PRODUCT_NAME: "Tesla-T4"
SKIP_LAUNCH: "true"
CONTAINER_RUNTIME: "containerd"
TEST_CASE: "./tests/cases/defaults.sh"
run: |
echo "${{ secrets.VSPHERE_SSH_KEY }}" > ${private_key} && chmod 400 ${private_key}
./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${VALIDATOR_IMAGE} ${VALIDATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
./tests/scripts/pull.sh /tmp/logs logs
exit $rc
- name: Archive test logs
if: ${{ failure() }}
uses: actions/upload-artifact@v4
with:
name: containerd-e2e-test-logs
path: ./logs/
retention-days: 15

e2e-tests-nvidiadriver:
needs: [build-gpu-operator, build-gpu-operator-validator]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
name: Check out code
- name: Set up Holodeck
uses: NVIDIA/[email protected]
with:
vsphere_username: ${{ secrets.VSPHERE_USERNAME }}
vsphere_password: ${{ secrets.VSPHERE_PASSWORD }}
vsphere_ssh_key: ${{ secrets.VSPHERE_SSH_KEY }}
holodeck_config: "tests/holodeck_vsphere.yaml"
- name: Get public dns name
id: get_public_dns_name
uses: mikefarah/yq@master
with:
cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
- name: Calculate test vars
id: vars
run: |
COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}
echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')
echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV

echo "OPERATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
echo "OPERATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator" >> $GITHUB_ENV
echo "VALIDATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
echo "VALIDATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator" >> $GITHUB_ENV

echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
- name: Run e2e tests
env:
GPU_PRODUCT_NAME: "Tesla-T4"
SKIP_LAUNCH: "true"
CONTAINER_RUNTIME: "containerd"
TEST_CASE: "./tests/cases/nvidia-driver.sh"
run: |
echo "${{ secrets.VSPHERE_SSH_KEY }}" > ${private_key} && chmod 400 ${private_key}
./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${VALIDATOR_IMAGE} ${VALIDATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
./tests/scripts/pull.sh /tmp/logs logs
exit $rc
- name: Archive test logs
if: ${{ failure() }}
uses: actions/upload-artifact@v4
with:
name: nvidiadriver-e2e-test-logs
path: ./logs/
retention-days: 15
10 changes: 6 additions & 4 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ on:
- opened
- synchronize
branches:
- main
- release-*
# - main
# - release-*
- no-test
push:
branches:
- main
- release-*
# - main
# - release-*
- no-test

jobs:
### Configuration checks ###
Expand Down
1 change: 1 addition & 0 deletions .nvidia-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ sign:ngc-gpu-operator-validator-ubi9:
before_script:
- apk add --no-cache openssh-client rsync bash sshpass curl
script:
- echo "SHIVA == "
- source cnt-ci/infrastructure-info
- chmod 400 ${VSPHERE_SSH_PRIVATE_KEY}
- export private_key="${VSPHERE_SSH_PRIVATE_KEY}"
Expand Down
31 changes: 31 additions & 0 deletions tests/holodeck_vsphere.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: holodeck.nvidia.com/v1alpha1
kind: Environment
metadata:
name: holodeck-vsphere-e2e-test
description: "end-to-end test infrastructure"
spec:
provider: vsphere
auth:
keyName: cnt-ci
privateKey: /home/nvidia/.ssh/gpu_runner
VsphereVirtualMachine:
vCenterURL: "pdx01egxvc01.nvidia.com"
datacenter: "PDX01"
datastore: "PDX01EGX_vSAN_001"
cluster: "PDX01EGX001"
network: "PDX01EGXDVS001_Internal_307_10.185.111.0_24"
vmFolder: "GitHub ARC"
resoursePool: "CNT-ARC"
templateImage: "GPU_UBUNTU_TEMPLATE"
containerRuntime:
install: true
name: containerd
# SHIVA
nvidiaContainerToolkit:
install: true
nvidiaDriver:
install: true
kubernetes:
install: true
installer: kubeadm
version: v1.28.5
Loading