Skip to content

Long-running test on Azure #411

Long-running test on Azure

Long-running test on Azure #411

# ------------------------------------------------------------
# Copyright 2023 The Radius Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ------------------------------------------------------------
# This workflow performs functional tests every an hour on a pre-provisioned
# AKS (Azure Kubernetes Service) cluster.
#
# There are two types of tests in Radius: functional-test and e2e-azure-test.
# 'functional-test' checks the functionality of our application using a local
# Kubernetes cluster (kind), while 'e2e-azure-test' is executed on an AKS cluster
# focusing on performance and reliability.
#
# The test AKS cluster is pre-provisioned with various monitoring tools using the
# Bicep template in /test/infra/azure. Additionally, this cluster has a
# monitoring and alerting system in place, configured to notify the team of any
# abnormalities during the test.
#
# It's also important to note that this workflow does not build the Radius in every
# run. Instead, Radius CLI and container images are built from the main branch every
# 12 hours. We leverage the GitHub Actions cache to store the 'rad cli' and test
# information. On workflow initiation, it checks the last build time to determine if
# the previous build is still valid. If valid, the workflow skips the build steps
# and uses the cached 'rad cli' for testing.
#
# Grafana dashboard URL: https://radiuse2e00-dashboard-audycmffgberbghy.wus3.grafana.azure.com/
name: Long-running test on Azure
on:
schedule:
# Run every 2 hours
- cron: "0 */2 * * *"
pull_request:
branches:
- main
paths:
- '.github/workflows/long-running-azure.yaml'
env:
# Go version
GOVER: '^1.21'
GOPROXY: https://proxy.golang.org
# gotestsum version - see: https://github.com/gotestyourself/gotestsum
GOTESTSUM_VER: 1.10.0
# Container registry for storing container images
CONTAINER_REGISTRY: ghcr.io/radius-project/dev
# Container registry for storing Bicep recipe artifacts
BICEP_RECIPE_REGISTRY: ghcr.io/radius-project/dev
# The radius functional test timeout
FUNCTIONALTEST_TIMEOUT: 60m
# The Azure Location to store test resources
AZURE_LOCATION: westus3
# The base directory for storing test logs
RADIUS_CONTAINER_LOG_BASE: dist/container_logs
# The Radius helm chart location.
RADIUS_CHART_LOCATION: deploy/Chart/
# The region for AWS resources
AWS_REGION: 'us-west-2'
# The AWS account ID
AWS_ACCOUNT_ID: '${{ secrets.FUNCTEST_AWS_ACCOUNT_ID }}'
# The valid radius build time window in seconds to rebuild radius. 24 hours = 24 * 60 * 60 = 86400
VALID_RADIUS_BUILD_WINDOW: 86400
# The AKS cluster name
AKS_CLUSTER_NAME: 'radiuse2e00-aks'
# The resource group for AKS_CLUSTER_NAME resource.
AKS_RESOURCE_GROUP: 'radiuse2e00'
# Server where terraform test modules are deployed
TF_RECIPE_MODULE_SERVER_URL: "http://tf-module-server.radius-test-tf-module-server.svc.cluster.local"
jobs:
build:
name: Build Radius for test
runs-on: ubuntu-latest
if: github.repository == 'radius-project/radius'
outputs:
SKIP_BUILD: ${{ steps.skip-build.outputs.SKIP_BUILD }}
REL_VERSION: ${{ steps.gen-id.outputs.REL_VERSION }}
UNIQUE_ID: ${{ steps.gen-id.outputs.UNIQUE_ID }}
PR_NUMBER: ${{ steps.gen-id.outputs.PR_NUMBER }}
CHECKOUT_REPO: ${{ steps.gen-id.outputs.CHECKOUT_REPO }}
CHECKOUT_REF: ${{ steps.gen-id.outputs.CHECKOUT_REF }}
RAD_CLI_ARTIFACT_NAME: ${{ steps.gen-id.outputs.RAD_CLI_ARTIFACT_NAME }}
steps:
- name: Restore the latest cached binaries
uses: actions/cache/restore@v3
with:
path: ./dist/cache
key: radius-test-latest-
- name: Skip build if build is still valid
if: github.event_name != 'pull_request'
id: skip-build
run: |
# check if the last build time to see if we need to build again
if [ -f ./dist/cache/.lastbuildtime ]; then
lastbuild=$(cat ./dist/cache/.lastbuildtime)
current_time=$(date +%s)
if [ $((current_time-lastbuild)) -lt ${{ env.VALID_RADIUS_BUILD_WINDOW }} ]; then
echo "Skipping build as the last build is still valid."
echo "SKIP_BUILD=true" >> $GITHUB_OUTPUT
fi
fi
- name: Set up checkout target (scheduled)
if: steps.skip-build.outputs.SKIP_BUILD != 'true' && github.event_name == 'schedule'
run: |
echo "CHECKOUT_REPO=${{ github.repository }}" >> $GITHUB_ENV
echo "CHECKOUT_REF=refs/heads/main" >> $GITHUB_ENV
- name: Set up checkout target (pull_request)
if: steps.skip-build.outputs.SKIP_BUILD != 'true' && github.event_name == 'pull_request'
run: |
echo "CHECKOUT_REPO=${{ github.event.pull_request.head.repo.full_name }}" >> $GITHUB_ENV
echo "CHECKOUT_REF=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
- name: Generate ID for release
id: gen-id
run: |
if [ -z "${{ steps.skip-build.outputs.SKIP_BUILD }}" ]; then
BASE_STR="RADIUS|${GITHUB_SHA}|${GITHUB_SERVER_URL}|${GITHUB_REPOSITORY}|${GITHUB_RUN_ID}|${GITHUB_RUN_ATTEMPT}"
UNIQUE_ID=$(echo $BASE_STR | sha1sum | head -c 10)
echo "REL_VERSION=pr-${UNIQUE_ID}" >> $GITHUB_ENV
# Set output variables to be used in the other jobs
echo "REL_VERSION=pr-${UNIQUE_ID}" >> $GITHUB_OUTPUT
echo "UNIQUE_ID=${UNIQUE_ID}" >> $GITHUB_OUTPUT
echo "CHECKOUT_REPO=${{ env.CHECKOUT_REPO }}" >> $GITHUB_OUTPUT
echo "CHECKOUT_REF=${{ env.CHECKOUT_REF }}" >> $GITHUB_OUTPUT
echo "AZURE_TEST_RESOURCE_GROUP=radtest-${UNIQUE_ID}" >> $GITHUB_OUTPUT
echo "RAD_CLI_ARTIFACT_NAME=rad_cli_linux_amd64" >> $GITHUB_OUTPUT
echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> $GITHUB_OUTPUT
else
cat ./dist/cache/.buildenv >> $GITHUB_OUTPUT
echo "## Radius functional test environment variables" >> $GITHUB_STEP_SUMMARY
echo "Use the previously built binaries for the current test run:" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY
cat ./dist/cache/.buildenv >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
fi
- name: Check out code
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
uses: actions/checkout@v3
with:
repository: ${{ env.CHECKOUT_REPO }}
ref: ${{ env.CHECKOUT_REF }}
- name: Set up Go ${{ env.GOVER }}
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
uses: actions/setup-go@v4
with:
go-version: ${{ env.GOVER }}
- name: Log the summary of build info for new version.
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
continue-on-error: true
run: |
cat <<EOF > summary.md
## Radius functional test overview
| Name | Value |
|------|-------|
|**Repository** | ${{ steps.gen-id.outputs.CHECKOUT_REPO }} |
|**Commit ref** | ${{ steps.gen-id.outputs.CHECKOUT_REF }} |
|**Unique ID** | ${{ steps.gen-id.outputs.UNIQUE_ID }} |
|**Image tag** | ${{ steps.gen-id.outputs.REL_VERSION }} |
<details>
<summary> Click here to see the list of tools in the current test run</summary>
* gotestsum ${{ env.GOTESTSUM_VER }}
* Bicep recipe location `${{ env.BICEP_RECIPE_REGISTRY }}/test/functional/shared/recipes/<name>:${{ steps.gen-id.outputs.REL_VERSION }}`
* Terraform recipe location `${{ env.TF_RECIPE_MODULE_SERVER_URL }}/<name>.zip` (in cluster)
* applications-rp test image location: `${{ env.CONTAINER_REGISTRY }}/applications-rp:${{ steps.gen-id.outputs.REL_VERSION }}`
* controller test image location: `${{ env.CONTAINER_REGISTRY }}/controller:${{ steps.gen-id.outputs.REL_VERSION }}`
* ucp test image location: `${{ env.CONTAINER_REGISTRY }}/ucpd:${{ steps.gen-id.outputs.REL_VERSION }}`
</details>
## Test Status
EOF
cat summary.md >> $GITHUB_STEP_SUMMARY
- name: Setup Azure CLI
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
run: curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
- name: Login to Azure
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
uses: azure/login@v1
with:
creds: '{"clientId":"${{ secrets.INTEGRATION_TEST_SP_APP_ID }}","clientSecret":"${{ secrets.INTEGRATION_TEST_SP_PASSWORD }}","subscriptionId":"${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.INTEGRATION_TEST_TENANT_ID }}"}'
- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and Push container images
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
run: |
make build && make docker-build && make docker-push
env:
DOCKER_REGISTRY: ${{ env.CONTAINER_REGISTRY }}
DOCKER_TAG_VERSION: ${{ env.REL_VERSION }}
- name: Upload CLI binary
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
uses: actions/upload-artifact@v3
with:
name: ${{ steps.gen-id.outputs.RAD_CLI_ARTIFACT_NAME }}
path: |
./dist/linux_amd64/release/rad
- name: Log the build result (success)
if: steps.skip-build.outputs.SKIP_BUILD != 'true' && success()
continue-on-error: true
run: |
echo ":white_check_mark: Container images build succeeded" >> $GITHUB_STEP_SUMMARY
- name: Log the build result (failure)
if: steps.skip-build.outputs.SKIP_BUILD != 'true' && failure()
continue-on-error: true
run: |
echo ":x: Container images build failed" >> $GITHUB_STEP_SUMMARY
- name: Log test Bicep recipe publish status
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
continue-on-error: true
run: |
echo ":hourglass: Publishing Bicep Recipes for functional tests..." >> $GITHUB_STEP_SUMMARY
- name: Move the latest binaries to cache
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
run: |
mkdir -p ./dist/cache
cp ./dist/linux_amd64/release/rad ./dist/cache
echo $(date +%s) > ./dist/cache/.lastbuildtime
echo "UNIQUE_ID=${{ steps.gen-id.outputs.UNIQUE_ID }}" >> ./dist/cache/.buildenv
echo "REL_VERSION=${{ steps.gen-id.outputs.REL_VERSION }}" >> ./dist/cache/.buildenv
echo "CHECKOUT_REPO=${{ steps.gen-id.outputs.CHECKOUT_REPO }}" >> ./dist/cache/.buildenv
echo "CHECKOUT_REF=$(git rev-parse HEAD)" >> ./dist/cache/.buildenv
echo "PR_NUMBER=${{ steps.gen-id.outputs.PR_NUMBER }}" >> ./dist/cache/.buildenv
echo "BICEP_RECIPE_TAG_VERSION=${{ steps.gen-id.outputs.REL_VERSION }}" >> ./dist/cache/.buildenv
- name: Store the latest binaries into cache
uses: actions/cache/save@v3
if: steps.skip-build.outputs.SKIP_BUILD != 'true' && success()
with:
path: ./dist/cache
key: radius-test-latest-${{ github.sha }}-${{ github.run_number }}
- name: Publish Bicep Test Recipes
if: steps.skip-build.outputs.SKIP_BUILD != 'true'
run: |
mkdir ./bin
cp ./dist/linux_amd64/release/rad ./bin/rad
chmod +x ./bin/rad
export PATH=$GITHUB_WORKSPACE/bin:$PATH
which rad || { echo "cannot find rad"; exit 1; }
rad bicep download
rad version
env:
BICEP_RECIPE_REGISTRY: ${{ env.BICEP_RECIPE_REGISTRY }}
BICEP_RECIPE_TAG_VERSION: ${{ env.REL_VERSION }}
- name: Log Bicep recipe publish status (success)
if: steps.skip-build.outputs.SKIP_BUILD != 'true' && success()
run: |
echo ":white_check_mark: Recipe publishing succeeded" >> $GITHUB_STEP_SUMMARY
- name: Log recipe publish status (failure)
if: steps.skip-build.outputs.SKIP_BUILD != 'true' && failure()
run: |
echo ":x: Test recipe publishing failed" >> $GITHUB_STEP_SUMMARY
tests:
name: Run functional tests
needs: build
runs-on: ubuntu-latest
if: github.repository == 'radius-project/radius'
env:
SKIP_BUILD: ${{ needs.build.outputs.SKIP_BUILD }}
UNIQUE_ID: ${{ needs.build.outputs.UNIQUE_ID }}
REL_VERSION: ${{ needs.build.outputs.REL_VERSION }}
CHECKOUT_REPO: ${{ needs.build.outputs.CHECKOUT_REPO }}
CHECKOUT_REF: ${{ needs.build.outputs.CHECKOUT_REF }}
PR_NUMBER: ${{ needs.build.outputs.PR_NUMBER }}
AZURE_TEST_RESOURCE_GROUP: radtest-${{ needs.build.outputs.UNIQUE_ID }}-e2e-all
RAD_CLI_ARTIFACT_NAME: ${{ needs.build.outputs.RAD_CLI_ARTIFACT_NAME }}
BICEP_RECIPE_TAG_VERSION: ${{ needs.build.outputs.REL_VERSION }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
repository: ${{ env.CHECKOUT_REPO }}
ref: ${{ env.CHECKOUT_REF }}
- name: Set up Go ${{ env.GOVER }}
uses: actions/setup-go@v4
with:
go-version: ${{ env.GOVER }}
- name: Download rad CLI
if: env.SKIP_BUILD != 'true'
uses: actions/download-artifact@v3
with:
name: ${{ env.RAD_CLI_ARTIFACT_NAME }}
path: bin
- name: Restore the latest cached binaries
if: env.SKIP_BUILD == 'true'
uses: actions/cache/restore@v3
with:
path: ./dist/cache
key: radius-test-latest-
- name: Install rad CLI in bin
if: env.SKIP_BUILD == 'true'
run: |
mkdir -p ./bin
mv ./dist/cache/rad ./bin/
chmod +x ./bin/rad
- name: Login to Azure
uses: azure/login@v1
with:
creds: '{"clientId":"${{ secrets.INTEGRATION_TEST_SP_APP_ID }}","clientSecret":"${{ secrets.INTEGRATION_TEST_SP_PASSWORD }}","subscriptionId":"${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.INTEGRATION_TEST_TENANT_ID }}"}'
- name: Create azure resource group - ${{ env.AZURE_TEST_RESOURCE_GROUP }}
run: |
current_time=$(date +%s)
az group create \
--location ${{ env.AZURE_LOCATION }} \
--name $RESOURCE_GROUP \
--subscription ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
--tags creationTime=$current_time
while [ $(az group exists --name $RESOURCE_GROUP) = false ]; do sleep 2; done
env:
RESOURCE_GROUP: ${{ env.AZURE_TEST_RESOURCE_GROUP }}
- name: Get kubeconf credential for AKS cluster
run: |
az aks get-credentials \
--subscription ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
--resource-group ${{ env.AKS_RESOURCE_GROUP }} \
--name ${{ env.AKS_CLUSTER_NAME }} --admin
env:
RESOURCE_GROUP: ${{ env.AZURE_TEST_RESOURCE_GROUP }}
- name: Clean up cluster
run: ./.github/scripts/cleanup-cluster.sh
- name: Download Bicep
run: |
chmod +x ./bin/rad
export PATH=$GITHUB_WORKSPACE/bin:$PATH
which rad || { echo "cannot find rad"; exit 1; }
rad bicep download
rad version
- name: Install gotestsum (test reporting tool)
run: |
go install gotest.tools/gotestsum@v${{ env.GOTESTSUM_VER }}
- name: Install Radius
if: env.SKIP_BUILD != 'true'
run: |
set -x
export PATH=$GITHUB_WORKSPACE/bin:$PATH
which rad || { echo "cannot find rad"; exit 1; }
echo "*** Installing Radius to Kubernetes ***"
rad install kubernetes --reinstall \
--chart ${{ env.RADIUS_CHART_LOCATION }} \
--set rp.image=${{ env.CONTAINER_REGISTRY }}/applications-rp,rp.tag=${{ env.REL_VERSION }},controller.image=${{ env.CONTAINER_REGISTRY }}/controller,controller.tag=${{ env.REL_VERSION }},ucp.image=${{ env.CONTAINER_REGISTRY }}/ucpd,ucp.tag=${{ env.REL_VERSION }}
- name: Configure Radius test workspace
run: |
set -x
export PATH=$GITHUB_WORKSPACE/bin:$PATH
which rad || { echo "cannot find rad"; exit 1; }
echo "*** Create workspace, group and environment for test ***"
rad workspace create kubernetes
rad workspace list
rad group create radius-e2e
rad group switch radius-e2e
# The functional test is designed to use default namespace. So you must create the environment for default namespace.
rad env create radius-e2e --namespace default
rad env switch radius-e2e
rad workspace list
echo "*** Configuring Azure provider ***"
rad env update radius-e2e --azure-subscription-id ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
--azure-resource-group ${{ env.AZURE_TEST_RESOURCE_GROUP }}
rad credential register azure --client-id ${{ secrets.INTEGRATION_TEST_SP_APP_ID }} \
--client-secret ${{ secrets.INTEGRATION_TEST_SP_PASSWORD }} \
--tenant-id ${{ secrets.INTEGRATION_TEST_TENANT_ID }}
echo "*** Configuring AWS provider ***"
rad env update radius-e2e --aws-region ${{ env.AWS_REGION }} --aws-account-id ${{ secrets.FUNCTEST_AWS_ACCOUNT_ID }}
rad credential register aws \
--access-key-id ${{ secrets.FUNCTEST_AWS_ACCESS_KEY_ID }} --secret-access-key ${{ secrets.FUNCTEST_AWS_SECRET_ACCESS_KEY }}
- name: Log radius installation status (failure)
if: failure()
run: |
echo ":x: Failed to install Radius for functional test." >> $GITHUB_STEP_SUMMARY
- name: Publish Terraform test recipes
run: |
make publish-test-terraform-recipes
- name: Get OIDC Issuer from AKS cluster
run: |
echo "FUNCTEST_OIDC_ISSUER=$(az aks show -n ${{ env.AKS_CLUSTER_NAME }} -g ${{ env.AKS_RESOURCE_GROUP }} --query "oidcIssuerProfile.issuerUrl" -otsv)" >> $GITHUB_ENV
- name: Run functional tests
run: |
# Ensure rad cli is in path before running tests.
export PATH=$GITHUB_WORKSPACE/bin:$PATH
cd $GITHUB_WORKSPACE
which rad || { echo "cannot find rad"; exit 1; }
# Populate the following test environment variables from JSON secret.
# AZURE_MONGODB_RESOURCE_ID
# AZURE_COSMOS_MONGODB_ACCOUNT_ID
# AZURE_TABLESTORAGE_RESOURCE_ID
# AZURE_SERVICEBUS_RESOURCE_ID
# AZURE_REDIS_RESOURCE_ID
# AZURE_MSSQL_RESOURCE_ID
# AZURE_MSSQL_USERNAME
# AZURE_MSSQL_PASSWORD
eval "export $(echo "${{ secrets.FUNCTEST_PREPROVISIONED_RESOURCE_JSON }}" | jq -r 'to_entries | map("\(.key)=\(.value)") | @sh')"
make test-functional-all
env:
TEST_TIMEOUT: ${{ env.FUNCTIONALTEST_TIMEOUT }}
RADIUS_CONTAINER_LOG_PATH: ${{ github.workspace }}/${{ env.RADIUS_CONTAINER_LOG_BASE }}
AWS_ACCESS_KEY_ID: ${{ secrets.FUNCTEST_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.FUNCTEST_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ env.AWS_REGION }}
RADIUS_SAMPLES_REPO_ROOT: ${{ github.workspace }}/samples
# Test_MongoDB_Recipe_Parameters is using the following environment variable.
INTEGRATION_TEST_RESOURCE_GROUP_NAME: ${{ env.AZURE_TEST_RESOURCE_GROUP }}
FUNC_TEST_OIDC_ISSUER: ${{ env.FUNCTEST_OIDC_ISSUER }}
- name: Log radius e2e test status (success)
if: success()
run: |
echo ":white_check_mark: functional tests succeeded" >> $GITHUB_STEP_SUMMARY
- name: Log radius e2e test status (failure)
if: failure()
run: |
echo ":x: functional test failed." >> $GITHUB_STEP_SUMMARY
- name: Delete azure resource group - ${{ env.AZURE_TEST_RESOURCE_GROUP }}
if: always()
run: |
# if deletion fails, purge workflow will purge the resource group and its resources later.
az group delete \
--subscription ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
--name ${{ env.AZURE_TEST_RESOURCE_GROUP }} \
--yes --verbose
- name: Clean up cluster
if: always()
run: ./.github/scripts/cleanup-cluster.sh