diff --git a/.github/workflows/deploy-hubs.yaml b/.github/workflows/deploy-hubs.yaml index e9338b9368..c3af3387f9 100644 --- a/.github/workflows/deploy-hubs.yaml +++ b/.github/workflows/deploy-hubs.yaml @@ -45,45 +45,31 @@ on: - "!terraform/gcp/projects/cluster.tfvars.template" - "!eksctl/template.jsonnet" -# Queue triggered executions of this workflow stemming from pushes to avoid -# deployment conflicts. -# -# By using a different concurrency groups for pull requests and pushes, we -# reduce the risk of cancelling a queued but not started workflow as discussed -# in https://github.com/2i2c-org/infrastructure/issues/3214. -# -# github.head_ref is used to create PR unique concurrency groups, and for -# workflow executions not triggered by a PR we get a dedicated group. -# # ref: https://docs.github.com/en/actions/using-jobs/using-concurrency -# concurrency: group: ${{ github.workflow }}-${{ github.head_ref || 'not-a-pr' }} cancel-in-progress: false -# This environment variable triggers the deployer to colourise print statements in the -# GitHug Actions logs for easy reading env: TERM: xterm - # This may not be required any more, and it may depend on the kubectl version - # we use etc as well. For now, we have it added to avoid issues. USE_GKE_GCLOUD_AUTH_PLUGIN: "True" jobs: # This job runs in Pull Requests and on pushes to the default branch. It identifies - # which files have been added or modified by recent GitHub activity and parsed a list - # to the `deployer generate helm-upgrade-job`s command of the deployer. This command generates - # two lists of dictionaries, which can be read by GitHub Actions as matrix jobs. The - # first set of jobs describes which clusters need their support chart and/or staging - # hub upgraded; and the second set of jobs describe which production hubs require - # upgrading. These two lists are set as job outputs via GITHUB_OUTPUT to be consumed - # by the later jobs. They are also pretty-printed in a human-readable format to the - # logs, and converted into Markdown tables for posting into GitHub comments. + # which files have been added or modified by recent GitHub activity and parses a list + # to the `deployer generate helm-upgrade-job` command of the deployer. This command generates + # three lists of dictionaries, which can be read by GitHub Actions as matrix jobs. The + # first set of jobs describes which clusters need their support chart upgraded; the second set + # of jobs describe which staging hubs require upgrading; and the third set of jobs describe + # which production hubs require upgrading. These lists are set as job outputs via GITHUB_OUTPUT + # to be consumed by the later jobs. They are also pretty-printed in a human-readable format + # to the logs, and converted into Markdown tables for posting into GitHub comments. generate-jobs: runs-on: ubuntu-latest outputs: - support-and-staging-matrix-jobs: ${{ steps.generate-jobs.outputs.support-and-staging-matrix-jobs }} - prod-hub-matrix-jobs: ${{ steps.generate-jobs.outputs.prod-hub-matrix-jobs }} + support-jobs: ${{ steps.generate-jobs.outputs.support-jobs }} + staging-jobs: ${{ steps.generate-jobs.outputs.staging-jobs }} + prod-jobs: ${{ steps.generate-jobs.outputs.prod-jobs }} steps: - uses: actions/checkout@v4 @@ -100,9 +86,6 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pip - # key determines if we define or reuse an existing cache or not. Our - # key ensure we cache within a workflow run and its attempts, but not - # between workflow runs. key: "${{ github.run_id }}" - name: Install deployer script's Python dependencies @@ -163,8 +146,7 @@ jobs: - added|modified: config/clusters/** # This step will create a comment-body.txt file containing the jobs to be run in a - # Markdown table format to be posted on a Pull Request, if this job is triggered - # by one + # Markdown table format to be posted on a Pull Request - name: Generate matrix jobs id: generate-jobs run: | @@ -182,8 +164,9 @@ jobs: - name: Upload artifacts if: > github.event_name == 'pull_request' && - (steps.generate-jobs.outputs.support-and-staging-matrix-jobs != '[]' || - steps.generate-jobs.outputs.prod-hub-matrix-jobs != '[]') + (steps.generate-jobs.outputs.support-jobs != '[]' || + steps.generate-jobs.outputs.staging-jobs != '[]' || + steps.generate-jobs.outputs.prod-jobs != '[]') uses: actions/upload-artifact@v4 with: name: pr @@ -209,13 +192,10 @@ jobs: env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_GHA_FAILURES_WEBHOOK_URL }} - # This job upgrades the support chart, staging hub, and dask-staging hub (if present) - # for clusters in parallel, if those upgrades are required. This job needs the - # `generate-jobs` job to have completed and set an output to the - # `support-and-staging-matrix-jobs` variable name. It's inputs are a list of - # dictionaries with the keys cluster_name, provider, upgrade_support, and - # upgrade_staging for each cluster that requires it. - upgrade-support-and-staging: + # This job upgrades the support chart for clusters in parallel, if those upgrades + # are required. This job needs the `generate-jobs` job to have completed and set + # an output to the `support-jobs` variable name. + upgrade-support: runs-on: ubuntu-latest needs: [generate-jobs] @@ -224,9 +204,6 @@ jobs: # see this post for feature requests for this to be improved: # https://github.community/t/bug-jobs-output-should-return-a-list-for-a-matrix-job/128626/32?u=consideratio # - # Warning: names can include alphanumerics, '-', and '_', but not '.', so - # we replace '.' for '-' in cluster names. - # # If you are adding a new cluster, please remember to list it here! outputs: failure_2i2c-aws-us: "${{ steps.declare-failure.outputs.failure_2i2c-aws-us }}" @@ -263,12 +240,11 @@ jobs: if: | (github.event_name == 'push' && contains(github.ref, 'main')) && needs.generate-jobs.result == 'success' && - needs.generate-jobs.outputs.support-and-staging-matrix-jobs != '[]' + needs.generate-jobs.outputs.support-jobs != '[]' strategy: - # Don't stop other deployments if one fails fail-fast: false matrix: - jobs: ${{ fromJson(needs.generate-jobs.outputs.support-and-staging-matrix-jobs) }} + jobs: ${{ fromJson(needs.generate-jobs.outputs.support-jobs) }} steps: - uses: actions/checkout@v4 @@ -280,39 +256,245 @@ jobs: GCP_KMS_DECRYPTOR_KEY: ${{ secrets.GCP_KMS_DECRYPTOR_KEY }} - name: Upgrade support chart on cluster ${{ matrix.jobs.cluster_name }} - if: matrix.jobs.upgrade_support run: | deployer deploy-support ${{ matrix.jobs.cluster_name }} - - name: Upgrade staging hub on cluster ${{ matrix.jobs.cluster_name }} - if: matrix.jobs.upgrade_staging + - name: Declare failure status + id: declare-failure + if: always() + shell: python run: | - deployer deploy ${{ matrix.jobs.cluster_name }} staging + import os - # Retry action: https://github.com/marketplace/actions/retry-step - - name: Run health check for staging hub on cluster ${{ matrix.jobs.cluster_name }} - if: matrix.jobs.upgrade_staging - uses: nick-fields/retry@v3 + name = "${{ matrix.jobs.cluster_name }}".replace(".", "-") + failure = "${{ job.status == 'failure' }}" + + output_file = os.getenv("GITHUB_OUTPUT") + with open(output_file, "a") as f: + f.write(f"failure_{name}={failure}") + + # https://github.com/ravsamhq/notify-slack-action + # Needs to be added per job + # FIXME: when https://github.com/integrations/slack/issues/1563 gets implemented, + # we can use that instead + - name: Report Status + if: always() + uses: ravsamhq/notify-slack-action@v2 + # Warning: there are multiple "Report Status" steps in this file (one per job). + # Make sure they are all updated with: - timeout_minutes: 10 - max_attempts: 2 - command: | - deployer run-hub-health-check ${{ matrix.jobs.cluster_name }} staging + notify_when: "failure" + status: ${{ job.status }} # required + # Message should look like: "Hey @author! Deploy and test hubs failed for "Add new hub - 9305e08". + message_format: '{emoji} Hey @${{ github.event.head_commit.author.name }}! *{workflow}* {status_message} for "${{ github.event.head_commit.message }} - <{commit_url}|{commit_sha}>". Checkout the run at {run_url}.' + footer: "<{run_url}|Failing Run>" + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_GHA_FAILURES_WEBHOOK_URL }} - - name: Upgrade dask-staging hub on cluster ${{ matrix.jobs.cluster_name }} if it exists - if: matrix.jobs.upgrade_staging && matrix.jobs.cluster_name == '2i2c' + # This job reduces the initially planned staging-jobs and prod-jobs deployments + # by filtering out any deployment to a cluster with a failed support job. + filter-failed-support: + runs-on: ubuntu-latest + needs: [generate-jobs, upgrade-support] + if: | + !cancelled() && + (github.event_name == 'push' && contains(github.ref, 'main')) && + needs.generate-jobs.result == 'success' && + needs.generate-jobs.outputs.staging-jobs != '[]' && + needs.generate-jobs.outputs.prod-jobs != '[]' + + outputs: + staging-jobs: ${{ steps.filter-jobs.outputs.filtered-staging-jobs}} + prod-jobs: ${{ steps.filter-jobs.outputs.filtered-prod-jobs }} + + steps: + # This Python script filters out any staging and/or prod hub deployment job + # from running later based on if its part of a cluster where support upgrade + # just failed. + - name: Filter staging and prod deploy jobs to run based on failures in support + id: filter-jobs + shell: python run: | - deployer deploy ${{ matrix.jobs.cluster_name }} dask-staging + import os + import json + + staging_jobs = json.loads(r"""${{ needs.generate-jobs.outputs.staging-jobs}}""") + prod_jobs = json.loads(r"""${{ needs.generate-jobs.outputs.prod-jobs }}""") + outputs = json.loads(r"""${{ toJson(needs.upgrade-support.outputs) }}""") + + try: + filtered_staging_jobs = [ + staging_job + for staging_job in staging_jobs + if outputs[f"failure_{staging_job['cluster_name'].replace('.', '-')}"] != "true" + ] + + filtered_prod_jobs = [ + prod_job + for prod_job in prod_jobs + if outputs[f"failure_{prod_job['cluster_name'].replace('.', '-')}"] != "true" + ] + except KeyError: + print(f"The {cluster_name} cluster wasn't found in the `upgrade-support.outputs` list. Please add it before continuing!") + + output_file = os.getenv("GITHUB_OUTPUT") + with open(output_file, "a") as f: + f.write(f"filtered-staging-jobs={json.dumps(filtered_staging_jobs)}\n") + f.write(f"filtered-prod-jobs={json.dumps(filtered_prod_jobs)}") + + # https://github.com/ravsamhq/notify-slack-action + # Needs to be added per job + # When https://github.com/integrations/slack/issues/1563 gets implemented, + # we can use that instead + - name: Report Status + if: always() + uses: ravsamhq/notify-slack-action@v2 + with: + # Warning: there are multiple "Report Status" steps in this file (one per job). + # Make sure they are all updated + notify_when: "failure" + status: ${{ job.status }} # required + # Message should look like: "Hey @author! Deploy and test hubs failed for "Add new hub - 9305e08". + message_format: '{emoji} Hey @${{ github.event.head_commit.author.name }}! *{workflow}* {status_message} for "${{ github.event.head_commit.message }} - <{commit_url}|{commit_sha}>". Checkout the run at {run_url}.' + footer: "<{run_url}|Failing Run>" + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_GHA_FAILURES_WEBHOOK_URL }} + + # We need to run this job because if there are no support jobs executed, then + # filter-failed-support won't produce an output. We cannot use logic in a + # matrix.jobs definition, + # e.g. matrix.jobs: ${{ needs.filter-failed-suport-jobs.outputs.staging-jobs || needs.generate-jobs.outputs.staging-jobs }}; + # therefore, we need to do this logic in another job and pass it along. + reset-jobs: + runs-on: ubuntu-latest + needs: [generate-jobs, filter-failed-support] + if: | + !cancelled() && + (github.event_name == 'push' && contains(github.ref, 'main')) && + needs.generate-jobs.result == 'success' && + needs.generate-jobs.outputs.staging-jobs != '[]' && + needs.generate-jobs.outputs.prod-jobs != '[]' + outputs: + staging-jobs: ${{ steps.reset-jobs.outputs.staging-jobs }} + prod-jobs: ${{ steps.reset-jobs.outputs.prod-jobs }} + steps: + - id: reset-jobs + shell: python + run: | + import os + import json + + try: + staging_jobs = json.loads(r"""${{ needs.filter-failed-support.outputs.staging-jobs }}""") + prod_jobs = json.loads(r"""${{ needs.filter-failed-support.outputs.prod-jobs }}""") + except KeyError: + staging_jobs = json.loads(r"""${{ needs.generate-jobs.outputs.staging-jobs }}""") + prod_jobs = json.loads(r"""${{ needs.generate-jobs.outputs.prod-jobs }}""") + + output_file = os.getenv("GITHUB_OUTPUT") + with open(output_file, "a") as f: + f.write(f"staging-jobs={json.dumps(staging_jobs)}\n") + f.write(f"prod-jobs={json.dumps(prod_jobs)}\n") + + # https://github.com/ravsamhq/notify-slack-action + # Needs to be added per job + # When https://github.com/integrations/slack/issues/1563 gets implemented, + # we can use that instead + - name: Report Status + if: always() + uses: ravsamhq/notify-slack-action@v2 + with: + # Warning: there are multiple "Report Status" steps in this file (one per job). + # Make sure they are all updated + notify_when: "failure" + status: ${{ job.status }} # required + # Message should look like: "Hey @author! Deploy and test hubs failed for "Add new hub - 9305e08". + message_format: '{emoji} Hey @${{ github.event.head_commit.author.name }}! *{workflow}* {status_message} for "${{ github.event.head_commit.message }} - <{commit_url}|{commit_sha}>". Checkout the run at {run_url}.' + footer: "<{run_url}|Failing Run>" + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_GHA_FAILURES_WEBHOOK_URL }} + + # This job upgrades staging hubs on clusters in parallel, if required. This + # job needs the `reset-jobs` to have completed to provide its output `staging-jobs`. + # It is a list of dictionaries with the keys cluster_name, provider, and hub_name + # for each staging hub that requires an upgrade and didn't have a failed + # support-upgrade job. + upgrade-staging: + runs-on: ubuntu-latest + needs: [reset-jobs] + if: | + !cancelled() && + (github.event_name == 'push' && contains(github.ref, 'main')) && + needs.reset-jobs.result == 'success' && + needs.reset-jobs.outputs.staging-jobs != '[]' + strategy: + fail-fast: false + matrix: + jobs: ${{ fromJson(needs.reset-jobs.outputs.staging-jobs) }} + + # We declare outputs indicating the job failed status of a specific job + # variation. We are currently required to do this in a hardcoded fashion, + # see this post for feature requests for this to be improved: + # https://github.community/t/bug-jobs-output-should-return-a-list-for-a-matrix-job/128626/32?u=consideratio + # + # If you are adding a new cluster/staging hub, please remember to list it here! + outputs: + failure_2i2c_staging: "${{ steps.declare-failure.outputs.failure_2i2c_staging }}" + failure_2i2c_dask-staging: "${{ steps.declare-failure.outputs.failure_2i2c_dask-staging }}" + failure_2i2c_ucmercedstaging: "${{ steps.declare-failure.outputs.failure_2i2c_ucmercedstaging }}" + failure_2i2c-aws-us_staging: "${{ steps.declare-failure.outputs.failure_2i2c-aws-us_staging }}" + failure_2i2c-aws-us_dask-staging: "${{ steps.declare-failure.outputs.failure_2i2c-aws-us_dask-staging }}" + failure_2i2c-uk_staging: "${{ steps.declare-failure.outputs.failure_2i2c-uk_staging }}" + failure_awi-ciroh_staging: "${{ steps.declare-failure.outputs.failure_awi-ciroh_staging }}" + failure_catalystproject-africa_staging: "${{ steps.declare-failure.outputs.failure_catalystproject-africa_staging }}" + failure_catalystproject-latam_staging: "${{ steps.declare-failure.outputs.failure_catalystproject-latam_staging }}" + failure_cloudbank_staging: "${{ steps.declare-failure.outputs.failure_cloudbank_staging }}" + failure_dubois_staging: "${{ steps.declare-failure.outputs.failure_dubois_staging }}" + failure_earthscope_staging: "${{ steps.declare-failure.outputs.failure_earthscope_staging }}" + failure_gridsst_staging: "${{ steps.declare-failure.outputs.failure_gridsst_staging }}" + failure_hhmi_staging: "${{ steps.declare-failure.outputs.failure_hhmi_staging }}" + failure_jupyter-health_staging: "${{ steps.declare-failure.outputs.failure_jupyter-health_staging }}" + failure_jupyter-meets-the-earth_staging: "${{ steps.declare-failure.outputs.failure_jupyter-meets-the-earth_staging }}" + failure_kitware_staging: "${{ steps.declare-failure.outputs.failure_kitware_staging }}" + failure_leap_staging: "${{ steps.declare-failure.outputs.failure_leap_staging }}" + failure_maap_staging: "${{ steps.declare-failure.outputs.failure_maap_staging }}" + failure_nasa-cryo_staging: "${{ steps.declare-failure.outputs.failure_nasa-cryo_staging }}" + failure_nasa-ghg_staging: "${{ steps.declare-failure.outputs.failure_nasa-ghg_staging }}" + failure_nasa-veda_staging: "${{ steps.declare-failure.outputs.failure_nasa-veda_staging }}" + failure_nmfs-openscapes_staging: "${{ steps.declare-failure.outputs.failure_nmfs-openscapes_staging }}" + failure_openscapes_staging: "${{ steps.declare-failure.outputs.failure_openscapes_staging }}" + failure_opensci_staging: "${{ steps.declare-failure.outputs.failure_opensci_staging }}" + failure_pangeo-hubs_staging: "${{ steps.declare-failure.outputs.failure_pangeo-hubs_staging }}" + failure_projectpythia_staging: "${{ steps.declare-failure.outputs.failure_projectpythia_staging }}" + failure_queensu_staging: "${{ steps.declare-failure.outputs.failure_queensu_staging }}" + failure_smithsonian_staging: "${{ steps.declare-failure.outputs.failure_smithsonian_staging }}" + failure_strudel_staging: "${{ steps.declare-failure.outputs.failure_strudel_staging }}" + failure_ubc-eoas_staging: "${{ steps.declare-failure.outputs.failure_ubc-eoas_staging }}" + failure_utoronto_staging: "${{ steps.declare-failure.outputs.failure_utoronto_staging }}" + failure_utoronto_r-staging: "${{ steps.declare-failure.outputs.failure_utoronto_r-staging }}" + failure_victor_staging: "${{ steps.declare-failure.outputs.failure_victor_staging }}" + + steps: + - uses: actions/checkout@v4 + + - name: Setup deploy for ${{ matrix.jobs.cluster_name }} cluster + uses: ./.github/actions/setup-deploy + with: + provider: ${{ matrix.jobs.provider }} + GCP_KMS_DECRYPTOR_KEY: ${{ secrets.GCP_KMS_DECRYPTOR_KEY }} + + - name: Upgrade ${{ matrix.jobs.hub_name }} hub on cluster ${{ matrix.jobs.cluster_name }} + run: | + deployer deploy ${{ matrix.jobs.cluster_name }} ${{ matrix.jobs.hub_name }} # Retry action: https://github.com/marketplace/actions/retry-step - - name: Run health check for dask-staging hub on cluster ${{ matrix.jobs.cluster_name }} if it exists - if: matrix.jobs.upgrade_staging && matrix.jobs.cluster_name == '2i2c' + - name: Run health check against ${{ matrix.jobs.hub_name }} hub on cluster ${{ matrix.jobs.cluster_name}} uses: nick-fields/retry@v3 with: timeout_minutes: 10 - max_attempts: 2 + max_attempts: 3 command: | - deployer run-hub-health-check ${{ matrix.jobs.cluster_name }} dask-staging + deployer run-hub-health-check ${{ matrix.jobs.cluster_name }} ${{ matrix.jobs.hub_name }} - name: Declare failure status id: declare-failure @@ -321,23 +503,24 @@ jobs: run: | import os - name = "${{ matrix.jobs.cluster_name }}".replace(".", "-") + cluster_name = "${{ matrix.jobs.cluster_name }}".replace(".", "-") + hub_name = "${{ matrix.jobs.hub_name }}".replace(".", "-") failure = "${{ job.status == 'failure' }}" output_file = os.getenv("GITHUB_OUTPUT") with open(output_file, "a") as f: - f.write(f"failure_{name}={failure}") + f.write(f"failure_{cluster_name}_{hub_name}={failure}\n") # https://github.com/ravsamhq/notify-slack-action # Needs to be added per job - # FIXME: when https://github.com/integrations/slack/issues/1563 gets implemented, + # When https://github.com/integrations/slack/issues/1563 gets implemented, # we can use that instead - name: Report Status if: always() uses: ravsamhq/notify-slack-action@v2 - # Warning: there are multiple "Report Status" steps in this file (one per job). - # Make sure they are all updated with: + # Warning: there are multiple "Report Status" steps in this file (one per job). + # Make sure they are all updated notify_when: "failure" status: ${{ job.status }} # required # Message should look like: "Hey @author! Deploy and test hubs failed for "Add new hub - 9305e08". @@ -346,48 +529,51 @@ jobs: env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_GHA_FAILURES_WEBHOOK_URL }} - # This jobs reduces the initially planned prod-hub-matrix-jobs deployments by - # filtering out any deployment to a cluster with a failed support-and-staging - # job. - filter-generate-jobs: + # This job further reduces prod-jobs by filtering out any prod hub deployment + # to a cluster with a failed staging hub job. + filter-failed-staging: runs-on: ubuntu-latest - needs: [generate-jobs, upgrade-support-and-staging] + needs: [reset-jobs, upgrade-staging] if: | !cancelled() && (github.event_name == 'push' && contains(github.ref, 'main')) && - needs.generate-jobs.result == 'success' && - needs.generate-jobs.outputs.prod-hub-matrix-jobs != '[]' - + needs.reset-jobs.result == 'success' && + needs.reset-jobs.outputs.prod-jobs != '[]' outputs: - prod-hub-matrix-jobs: ${{ steps.filter-jobs.outputs.prod-hub-matrix-jobs }} - + prod-jobs: ${{ steps.filter-jobs.outputs.filtered-prod-jobs }} steps: # This Python script filters out any prod hub deployment job from running - # later based on if its part of a cluster where support/staging upgrade + # later based on if its part of a cluster where staging hub upgrade # just failed. Data is injected to the script before its executed via # string literals as rendered GitHub workflow expressions. - - name: Filter prod deploy jobs to run based on failures in support/staging + - name: Filter prod deploy jobs to run based on failures in staging id: filter-jobs shell: python run: | import os import json - jobs = json.loads(r"""${{ needs.generate-jobs.outputs.prod-hub-matrix-jobs }}""") - outputs = json.loads(r"""${{ toJson(needs.upgrade-support-and-staging.outputs) }}""") + prod_jobs = json.loads(r"""${{ needs.reset-jobs.outputs.prod-jobs }}""") + outputs = json.loads(r"""${{ toJson(needs.upgrade-staging.outputs) }}""") try: - filtered_jobs = [ - job - for job in jobs - if outputs[f"failure_{job['cluster_name'].replace('.', '-')}"] != "true" - ] + filtered_prod_jobs = [] + for prod_job in prod_jobs: + failed_jobs = { + k: v + for k, v in outputs.items() + if prod_job["cluster_name"] in k + and v == "true" + } + + if len(failed_jobs) == 0: + filtered_prod_jobs.append(prod_job) except KeyError: - print(f"The {cluster_name} cluster wasn't found in the `upgrade-support-and-staging.outputs` list. Please add it before continuing!") + print(f"The {cluster_name} cluster wasn't found in the `upgrade-staging.outputs` list. Please add it before continuing!") output_file = os.getenv("GITHUB_OUTPUT") with open(output_file, "a") as f: - f.write(f"prod-hub-matrix-jobs={json.dumps(filtered_jobs)}") + f.write(f"prod-jobs={json.dumps(filtered_prod_jobs)}") # https://github.com/ravsamhq/notify-slack-action # Needs to be added per job @@ -408,24 +594,22 @@ jobs: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_GHA_FAILURES_WEBHOOK_URL }} # This job upgrades production hubs on clusters in parallel, if required. This - # job needs both the `filter-generate-jobs` to have completed to provide its - # output `filtered-prod-hub-matrix-jobs`. It is a list of dictionaries with - # the keys cluster_name, provider, and hub_name for each production hub that - # requires an upgrade and didn't have a failed support-and-staging-upgrade job - # run as part of this workflow. - upgrade-prod-hubs: + # job needs the `filter-failed-staging` to have completed to provide its + # output `prod-jobs`. It is a list of dictionaries with the keys cluster_name, + # provider, and hub_name for each production hub that requires an upgrade and + # didn't have a failed staging job. + upgrade-prod: runs-on: ubuntu-latest - needs: [filter-generate-jobs] + needs: [filter-failed-staging] if: | !cancelled() && (github.event_name == 'push' && contains(github.ref, 'main')) && - needs.filter-generate-jobs.result == 'success' && - needs.filter-generate-jobs.outputs.prod-hub-matrix-jobs != '[]' + needs.filter-failed-staging.result == 'success' && + needs.filter-failed-staging.outputs.prod-jobs != '[]' strategy: - # Don't stop other deployments if one fails fail-fast: false matrix: - jobs: ${{ fromJson(needs.filter-generate-jobs.outputs.prod-hub-matrix-jobs) }} + jobs: ${{ fromJson(needs.filter-failed-staging.outputs.prod-jobs) }} steps: - uses: actions/checkout@v4 diff --git a/deployer/commands/generate/helm_upgrade/decision.py b/deployer/commands/generate/helm_upgrade/decision.py index ba76ea6d0a..9c50e96e3b 100644 --- a/deployer/commands/generate/helm_upgrade/decision.py +++ b/deployer/commands/generate/helm_upgrade/decision.py @@ -10,6 +10,7 @@ from rich.table import Table from ruamel.yaml import YAML +from deployer.utils.file_acquisition import find_absolute_path_to_cluster_file from deployer.utils.rendering import print_colour yaml = YAML(typ="safe", pure=True) @@ -26,7 +27,7 @@ def discover_modified_common_files(modified_paths): Returns: upgrade_support_on_all_clusters (bool): Whether or not all clusters should have - their support chart upgraded since has changes + their support chart upgraded since it has changes upgrade_all_hubs_on_all_clusters (bool): Whether or not all hubs on all clusters should be upgraded since a core piece of infrastructure has changed """ @@ -62,6 +63,33 @@ def discover_modified_common_files(modified_paths): return upgrade_support_on_all_clusters, upgrade_all_hubs_on_all_clusters +def filter_out_staging_hubs(all_hub_matrix_jobs): + """Separate staging hubs from prod hubs in hub matrix jobs. + + Args: + all_hub_matrix_jobs (list[dict]): A list of dictionaries representing matrix + jobs to upgrade deployed hubs as identified by the generate_hub_matrix_jobs + function. + + Returns: + staging_hub_matrix_jobs (list[dict]): A list of dictionaries representing + matrix jobs to upgrade staging hubs on clusters that require it. + prod_hub_matrix_jobs (list[dict]): A list of dictionaries representing matrix + jobs to upgrade all production hubs, i.e., those without "staging" in their + name. + """ + # Separate the jobs for hubs with "staging" in their name (including "dask-staging") + # from those without staging in their name + staging_hub_matrix_jobs = [ + job for job in all_hub_matrix_jobs if "staging" in job["hub_name"] + ] + prod_hub_matrix_jobs = [ + job for job in all_hub_matrix_jobs if "staging" not in job["hub_name"] + ] + + return staging_hub_matrix_jobs, prod_hub_matrix_jobs + + def generate_hub_matrix_jobs( cluster_file, cluster_config, @@ -88,7 +116,7 @@ def generate_hub_matrix_jobs( redeployed. added_or_modified_files (set[str]): A set of all added or modified files provided in a GitHub Pull Requests - pr_labels (list, optional): A list of PR labels + pr_labels (list, optional): A list of PR labels. Defaults to None. upgrade_all_hubs_on_this_cluster (bool, optional): If True, generates jobs to upgrade all hubs on the given cluster. This is triggered when the cluster.yaml file itself has been modified. Defaults to False. @@ -102,9 +130,9 @@ def generate_hub_matrix_jobs( deployed to that cluster, and the reason that hub needs to be redeployed. """ if pr_labels and "deployer:skip-deploy" in pr_labels: - return [] + return [], [] if pr_labels and "deployer:skip-deploy-hubs" in pr_labels: - return [] + return [], [] # Empty list to store all the matrix job definitions in matrix_jobs = [] @@ -151,7 +179,9 @@ def generate_hub_matrix_jobs( ) matrix_jobs.append(matrix_job) - return matrix_jobs + staging_hub_matrix_jobs, prod_hub_matrix_jobs = filter_out_staging_hubs(matrix_jobs) + + return staging_hub_matrix_jobs, prod_hub_matrix_jobs def generate_support_matrix_jobs( @@ -180,7 +210,7 @@ def generate_support_matrix_jobs( cluster to be redeployed. added_or_modified_files (set[str]): A set of all added or modified files provided in a GitHub Pull Requests - pr_labels (list, optional): A list of PR labels + pr_labels (list, optional): A list of PR labels. Defaults to None. upgrade_support_on_this_cluster (bool, optional): If True, generates jobs to update the support chart on the given cluster. This is triggered when the cluster.yaml file itself is modified. Defaults to False. @@ -200,19 +230,13 @@ def generate_support_matrix_jobs( { "cluster_name": 2i2c, "provider": "gcp", - "reason_for_support_redeploy": "Support helm chart has been modified", - "upgrade_support": True, + "reason_for_redeploy": "Support helm chart has been modified", }, ] """ if pr_labels and "deployer:skip-deploy" in pr_labels: return [] - # Rename dictionary key - cluster_info["reason_for_support_redeploy"] = cluster_info.pop( - "reason_for_redeploy" - ) - # Empty list to store the matrix definitions in matrix_jobs = [] @@ -223,15 +247,14 @@ def generate_support_matrix_jobs( # We know we're upgrading support on all clusters, so just add the cluster # name to the list of matrix jobs and move on matrix_job = cluster_info.copy() - matrix_job["upgrade_support"] = True if upgrade_support_on_all_clusters: if pr_labels and "deployer:deploy-support" in pr_labels: - matrix_job["reason_for_support_redeploy"] = ( + matrix_job["reason_for_redeploy"] = ( "deployer:deploy-support label detected" ) else: - matrix_job["reason_for_support_redeploy"] = ( + matrix_job["reason_for_redeploy"] = ( "Support helm chart has been modified" ) @@ -247,8 +270,7 @@ def generate_support_matrix_jobs( if intersection: matrix_job = cluster_info.copy() - matrix_job["upgrade_support"] = True - matrix_job["reason_for_support_redeploy"] = ( + matrix_job["reason_for_redeploy"] = ( "Following helm chart values files were modified: " + ", ".join([path.name for path in intersection]) ) @@ -260,176 +282,34 @@ def generate_support_matrix_jobs( return matrix_jobs -def move_staging_hubs_to_staging_matrix( - all_hub_matrix_jobs, support_and_staging_matrix_jobs -): - """This function's first argument is a list of dictionary jobs calculated for - hubs by the generate_hub_matrix_job function and filters them based on whether - "staging" appears in the "hub_name" field or not. The list of production hub jobs, - those without "staging" in their name, are returned unchanged as the first argument. - - The second argument is a list of dictionary jobs to upgrade the support chart on - clusters that require it. The filtered list of staging hubs, those with "staging" - in their name, is used to update these jobs with information to upgrade the staging - hub for that cluster. If a job for a cluster matching a staging hub does not already - exist in support_and_staging_matrix_jobs, one is created that *doesn't* also upgrade - the support chart since this is the reason the job doesn't exist in the first place. - - Updated support_and_staging_matrix_jobs with the following properties are returned - as the second argument. Note: string representations of booleans are required to be - recognised by the GitHub Actions runner. - - { - "cluster_name": str, - "provider": str, - "upgrade_support": bool, - "reason_for_support_redeploy_: str, - "upgrade_staging": bool, - "reason_for_staging_redeploy_: str, - } - - Args: - all_hub_matrix_jobs (list[dict]): A list of dictionaries representing matrix - jobs to upgrade deployed hubs as identified by the generate_hub_matrix_jobs - function. - support_and_staging_matrix_jobs (list[dict]): A list of dictionaries - representing matrix jobs to upgrade the support chart for clusters as - identified by the generate_support_matrix_jobs function. - - Returns: - prod_hub_matrix_jobs (list[dict]): A list of dictionaries representing matrix - jobs to upgrade all production hubs, i.e., those without "staging" in their - name. - support_and_staging_matrix_jobs (list[dict]): A list of dictionaries representing - matrix jobs to upgrade the support chart and staging hub on clusters that - require it. - """ - # Separate the jobs for hubs with "staging" in their name (including "dask-staging") - # from those without staging in their name - staging_hub_jobs = [ - job for job in all_hub_matrix_jobs if "staging" in job["hub_name"] - ] - prod_hub_matrix_jobs = [ - job for job in all_hub_matrix_jobs if "staging" not in job["hub_name"] - ] - - # Loop over each job for a staging hub - for staging_job in staging_hub_jobs: - # Find a job in support_and_staging_matrix_jobs that is for the same cluster as - # the current staging hub job - job_idx = next( - ( - idx - for (idx, job) in enumerate(support_and_staging_matrix_jobs) - if staging_job["cluster_name"] == job["cluster_name"] - ), - None, - ) - - if job_idx is not None: - # Update the matching job in support_and_staging_matrix_jobs to hold - # information related to upgrading the staging hub - support_and_staging_matrix_jobs[job_idx]["upgrade_staging"] = True - support_and_staging_matrix_jobs[job_idx]["reason_for_staging_redeploy"] = ( - staging_job["reason_for_redeploy"] - ) - else: - # A job with a matching cluster name doesn't exist, this is because its - # support chart doesn't need upgrading. We create a new job in that will - # upgrade the staging deployment for this cluster, but not the support - # chart. - new_job = { - "cluster_name": staging_job["cluster_name"], - "provider": staging_job["provider"], - "upgrade_staging": True, - "reason_for_staging_redeploy": staging_job["reason_for_redeploy"], - "upgrade_support": False, - "reason_for_support_redeploy": "", - } - support_and_staging_matrix_jobs.append(new_job) - - return prod_hub_matrix_jobs, support_and_staging_matrix_jobs - - -def ensure_support_staging_jobs_have_correct_keys( - support_and_staging_matrix_jobs, prod_hub_matrix_jobs -): - """This function ensures that all entries in support_and_staging_matrix_jobs have - the expected upgrade_staging and reason_for_staging_redeploy keys, even if they are - set to false/empty. - - Args: - support_and_staging_matrix_jobs (list[dict]): A list of dictionaries - representing jobs to upgrade the support chart and staging hub on clusters - that require it. - prod_hub_matrix_jobs (list[dict]): A list of dictionaries representing jobs to - upgrade production hubs that require it. - - Returns: - support_and_staging_matrix_jobs (list[dict]): Updated to ensure each entry has - the upgrade_staging and reason_for_staging_redeploy keys, even if they are - false/empty. - """ - # For each job listed in support_and_staging_matrix_jobs, ensure it has the - # upgrade_staging key present, even if we just set it to False - for job in support_and_staging_matrix_jobs: - if "upgrade_staging" not in job.keys(): - # Get a list of prod hubs running on the same cluster this staging job will - # run on - hubs_on_this_cluster = [ - hub["hub_name"] - for hub in prod_hub_matrix_jobs - if hub["cluster_name"] == job["cluster_name"] - ] - if hubs_on_this_cluster: - # There are prod hubs on this cluster that require an upgrade, and so we - # also upgrade staging - job["upgrade_staging"] = True - job["reason_for_staging_redeploy"] = ( - "Following prod hubs require redeploy: " - + ", ".join(hubs_on_this_cluster) - ) - else: - # There are no prod hubs on this cluster that require an upgrade, so we - # do not upgrade staging - job["upgrade_staging"] = False - job["reason_for_staging_redeploy"] = "" - - return support_and_staging_matrix_jobs - - def assign_staging_jobs_for_missing_clusters( - support_and_staging_matrix_jobs, prod_hub_matrix_jobs + staging_hub_matrix_jobs, prod_hub_matrix_jobs ): """Ensure that for each cluster listed in prod_hub_matrix_jobs, there is an - associated job in support_and_staging_matrix_jobs. This is our last-hope catch-all + associated job in staging_hub_matrix_jobs. This is our last-hope catch-all to ensure there are no prod hub jobs trying to run without an associated - support/staging job. + staging job. Args: - support_and_staging_matrix_jobs (list[dict]): A list of dictionaries - representing jobs to upgrade the support chart and staging hub on clusters - that require it. - prod_hub_matrix_jobs (list[dict]): A list of dictionaries representing jobs to - upgrade production hubs that require it. + staging_hub_matrix_jobs (list[dict]): A list of dictionaries representing + jobs to upgrade staging hubs on clusters that require it. + prod_hub_matrix_jobs (list[dict]): A list of dictionaries representing + jobs to upgrade production hubs that require it. Returns: - support_and_staging_matrix_jobs (list[dict]): Updated to ensure any clusters + staging_hub_matrix_jobs (list[dict]): Updated to ensure any clusters missing present in prod_hub_matrix_jobs but missing from - support_and_staging_matrix_jobs now have an associated support/staging job. + staging_hub_matrix_jobs now have an associated support/staging job. """ prod_hub_clusters = {job["cluster_name"] for job in prod_hub_matrix_jobs} - support_staging_clusters = { - job["cluster_name"] for job in support_and_staging_matrix_jobs - } - missing_clusters = prod_hub_clusters.difference(support_staging_clusters) + staging_hub_clusters = {job["cluster_name"] for job in staging_hub_matrix_jobs} + missing_clusters = prod_hub_clusters.difference(staging_hub_clusters) if missing_clusters: - # Generate support/staging jobs for clusters that don't have them but do have - # prod hub jobs. We assume they are missing because neither the support chart - # nor staging hub needed an upgrade. We set upgrade_support to False. However, - # if prod hubs need upgrading, then we should upgrade staging so set that to - # True. + # Generate staging jobs for clusters that don't have them but do have + # prod hub jobs. We assume they are missing because the staging hubs + # didn't need an upgrade. However if prod hubs need upgrading, then we + # should also upgrade staging hubs. for missing_cluster in missing_clusters: provider = next( ( @@ -445,53 +325,75 @@ def assign_staging_jobs_for_missing_clusters( if hub["cluster_name"] == missing_cluster ] - new_job = { - "cluster_name": missing_cluster, - "provider": provider, - "upgrade_support": False, - "reason_for_support_redeploy": "", - "upgrade_staging": True, - "reason_for_staging_redeploy": ( - "Following prod hubs require redeploy: " + ", ".join(prod_hubs) - ), - } - support_and_staging_matrix_jobs.append(new_job) + cluster_file = find_absolute_path_to_cluster_file(missing_cluster) + with open(cluster_file) as f: + cluster_config = yaml.load(f) + + staging_hubs = [ + hub["name"] + for hub in cluster_config.get("hubs") + if "staging" in hub["name"] + ] - return support_and_staging_matrix_jobs + for staging_hub in staging_hubs: + new_job = { + "cluster_name": missing_cluster, + "provider": provider, + "hub_name": staging_hub, + "reason_for_redeploy": ( + "Following prod hubs require redeploy: " + ", ".join(prod_hubs) + ), + } + staging_hub_matrix_jobs.append(new_job) + return staging_hub_matrix_jobs -def pretty_print_matrix_jobs(prod_hub_matrix_jobs, support_and_staging_matrix_jobs): + +def pretty_print_matrix_jobs( + support_matrix_jobs, staging_hub_matrix_jobs, prod_hub_matrix_jobs +): # Construct table for support chart upgrades - support_table = Table(title="Support chart and Staging hub upgrades") + support_table = Table(title="Support chart upgrades") support_table.add_column("Cloud Provider") support_table.add_column("Cluster Name") - support_table.add_column("Upgrade Support?") - support_table.add_column("Reason for Support Redeploy") - support_table.add_column("Upgrade Staging?") - support_table.add_column("Reason for Staging Redeploy") + support_table.add_column("Reason for Redeploy") # Add rows - for job in support_and_staging_matrix_jobs: + for job in support_matrix_jobs: support_table.add_row( job["provider"], job["cluster_name"], - "Yes" if job["upgrade_support"] else "No", - job["reason_for_support_redeploy"], - "Yes" if job["upgrade_staging"] else "No", - job["reason_for_staging_redeploy"], + job["reason_for_redeploy"], + end_section=True, + ) + + # Construct table for staging hub upgrades + staging_hub_table = Table(title="Staging hub upgrades") + staging_hub_table.add_column("Cloud Provider") + staging_hub_table.add_column("Cluster Name") + staging_hub_table.add_column("Hub Name") + staging_hub_table.add_column("Reason for Redeploy") + + # Add rows + for job in staging_hub_matrix_jobs: + staging_hub_table.add_row( + job["provider"], + job["cluster_name"], + job["hub_name"], + job["reason_for_redeploy"], end_section=True, ) # Construct table for prod hub upgrades - hub_table = Table(title="Prod hub upgrades") - hub_table.add_column("Cloud Provider") - hub_table.add_column("Cluster Name") - hub_table.add_column("Hub Name") - hub_table.add_column("Reason for Redeploy") + prod_hub_table = Table(title="Prod hub upgrades") + prod_hub_table.add_column("Cloud Provider") + prod_hub_table.add_column("Cluster Name") + prod_hub_table.add_column("Hub Name") + prod_hub_table.add_column("Reason for Redeploy") # Add rows for job in prod_hub_matrix_jobs: - hub_table.add_row( + prod_hub_table.add_row( job["provider"], job["cluster_name"], job["hub_name"], @@ -501,4 +403,5 @@ def pretty_print_matrix_jobs(prod_hub_matrix_jobs, support_and_staging_matrix_jo console = Console() console.print(support_table) - console.print(hub_table) + console.print(staging_hub_table) + console.print(prod_hub_table) diff --git a/deployer/commands/generate/helm_upgrade/jobs.py b/deployer/commands/generate/helm_upgrade/jobs.py index ccce6fdadb..b545e6332b 100644 --- a/deployer/commands/generate/helm_upgrade/jobs.py +++ b/deployer/commands/generate/helm_upgrade/jobs.py @@ -11,10 +11,8 @@ from .decision import ( assign_staging_jobs_for_missing_clusters, discover_modified_common_files, - ensure_support_staging_jobs_have_correct_keys, generate_hub_matrix_jobs, generate_support_matrix_jobs, - move_staging_hubs_to_staging_matrix, pretty_print_matrix_jobs, ) @@ -59,8 +57,9 @@ def helm_upgrade_jobs( cluster_files = get_all_cluster_yaml_files() # Empty lists to store job definitions in + support_matrix_jobs = [] + staging_hub_matrix_jobs = [] prod_hub_matrix_jobs = [] - support_and_staging_matrix_jobs = [] for cluster_file in cluster_files: # Read in the cluster.yaml file @@ -92,20 +91,20 @@ def helm_upgrade_jobs( upgrade_support_on_this_cluster = False # Generate a job matrix of all hubs that need upgrading on this cluster - prod_hub_matrix_jobs.extend( - generate_hub_matrix_jobs( - cluster_file, - cluster_config, - cluster_info, - set(changed_filepaths), - pr_labels, - upgrade_all_hubs_on_this_cluster=upgrade_all_hubs_on_this_cluster, - upgrade_all_hubs_on_all_clusters=upgrade_all_hubs_on_all_clusters, - ) + staging_hubs, prod_hubs = generate_hub_matrix_jobs( + cluster_file, + cluster_config, + cluster_info, + set(changed_filepaths), + pr_labels, + upgrade_all_hubs_on_this_cluster=upgrade_all_hubs_on_this_cluster, + upgrade_all_hubs_on_all_clusters=upgrade_all_hubs_on_all_clusters, ) + staging_hub_matrix_jobs.extend(staging_hubs) + prod_hub_matrix_jobs.extend(prod_hubs) # Generate a job matrix for support chart upgrades - support_and_staging_matrix_jobs.extend( + support_matrix_jobs.extend( generate_support_matrix_jobs( cluster_file, cluster_config, @@ -118,21 +117,13 @@ def helm_upgrade_jobs( ) # Clean up the matrix jobs - ( - prod_hub_matrix_jobs, - support_and_staging_matrix_jobs, - ) = move_staging_hubs_to_staging_matrix( - prod_hub_matrix_jobs, support_and_staging_matrix_jobs - ) - support_and_staging_matrix_jobs = ensure_support_staging_jobs_have_correct_keys( - support_and_staging_matrix_jobs, prod_hub_matrix_jobs - ) - support_and_staging_matrix_jobs = assign_staging_jobs_for_missing_clusters( - support_and_staging_matrix_jobs, prod_hub_matrix_jobs + staging_hub_matrix_jobs = assign_staging_jobs_for_missing_clusters( + staging_hub_matrix_jobs, prod_hub_matrix_jobs ) - # Pretty print the jobs using rich - pretty_print_matrix_jobs(prod_hub_matrix_jobs, support_and_staging_matrix_jobs) + pretty_print_matrix_jobs( + support_matrix_jobs, staging_hub_matrix_jobs, prod_hub_matrix_jobs + ) # The existence of the CI environment variable is an indication that we are running # in an GitHub Actions workflow @@ -145,15 +136,14 @@ def helm_upgrade_jobs( if ci_env: # Add these matrix jobs as output variables for use in another job with open(output_file, "a") as f: - f.write(f"prod-hub-matrix-jobs={json.dumps(prod_hub_matrix_jobs)}\n") - f.write( - f"support-and-staging-matrix-jobs={json.dumps(support_and_staging_matrix_jobs)}\n" - ) + f.write(f"support-jobs={json.dumps(support_matrix_jobs)}\n") + f.write(f"staging-jobs={json.dumps(staging_hub_matrix_jobs)}\n") + f.write(f"prod-jobs={json.dumps(prod_hub_matrix_jobs)}\n") - # Don't bother generating a comment if both of the matrices are empty - if support_and_staging_matrix_jobs or prod_hub_matrix_jobs: + # Don't bother generating a comment if all of the matrices are empty + if support_matrix_jobs or staging_hub_matrix_jobs or prod_hub_matrix_jobs: # Generate Markdown tables from the job matrices and write them to a file # for use in another job create_markdown_comment( - support_and_staging_matrix_jobs, prod_hub_matrix_jobs + support_matrix_jobs, staging_hub_matrix_jobs, prod_hub_matrix_jobs ) diff --git a/deployer/utils/rendering.py b/deployer/utils/rendering.py index 6e98bfffc4..6c8760698e 100644 --- a/deployer/utils/rendering.py +++ b/deployer/utils/rendering.py @@ -36,15 +36,17 @@ def print_colour(msg: str, colour="green"): print(msg) -def create_markdown_comment(support_staging_matrix, prod_matrix): +def create_markdown_comment(support_matrix, staging_matrix, prod_matrix): """Convert a list of dictionaries into a Markdown formatted table for posting to GitHub as comments. This function will write the Markdown content to a file to allow a GitHub Actions to upload it as an artifact and reuse the content in another workflow. Args: - support_staging_matrix (list[dict]): The support of staging jobs to be converted - into a Markdown formatted table + support_matrix (list[dict]): The support jobs to be converted into a Markdown + formatted table + staging_matrix (list[dict]): The staging jobs to be converted into a Markdown + formatted table prod_matrix (list[dict]): The production jobs to be converted into a Markdown formatted table """ @@ -52,56 +54,57 @@ def create_markdown_comment(support_staging_matrix, prod_matrix): column_converter = { "cluster_name": "Cluster Name", "provider": "Cloud Provider", - "upgrade_support": "Upgrade Support?", - "reason_for_support_redeploy": "Reason for Support Redeploy", - "upgrade_staging": "Upgrade Staging?", - "reason_for_staging_redeploy": "Reason for Staging Redeploy", "hub_name": "Hub Name", "reason_for_redeploy": "Reason for Redeploy", } - # A dictionary to convert row values when they are Boolean - boolean_converter = { - True: "Yes", - False: "No", - } - # === To reliably convert a list of dictionaries into a Markdown table, the keys # === must be consistent across each dictionary in the list as they will become the # === columns of the table. Moreover, we want the columns to be in 'sensible' order # === when a human reads this table; therefore, we reformat the inputted jobs. - # Only execute if support_staging_matrix is not an empty list - if support_staging_matrix: - # Format the Support and Staging matrix jobs - formatted_support_staging_matrix = [] - for entry in support_staging_matrix: + # Only execute if support_matrix is not an empty list + if support_matrix: + # Format the Support matrix jobs + formatted_support_matrix = [] + for entry in support_matrix: + formatted_entry = { + column_converter["provider"]: entry["provider"], + column_converter["cluster_name"]: entry["cluster_name"], + column_converter["reason_for_redeploy"]: entry["reason_for_redeploy"], + } + formatted_support_matrix.append(formatted_entry) + + # Generate a Markdown table + support_md_table = ( + markdown_table(formatted_support_matrix) + .set_params(row_sep="markdown", quote=False) + .get_markdown() + ) + else: + support_md_table = [] + + # Only execute if staging_matrix is not an empty list + if staging_matrix: + # Format the Staging Hubs matrix jobs + formatted_staging_matrix = [] + for entry in staging_matrix: formatted_entry = { column_converter["provider"]: entry["provider"], column_converter["cluster_name"]: entry["cluster_name"], - column_converter["upgrade_support"]: boolean_converter[ - entry["upgrade_support"] - ], - column_converter["reason_for_support_redeploy"]: entry[ - "reason_for_support_redeploy" - ], - column_converter["upgrade_staging"]: boolean_converter[ - entry["upgrade_staging"] - ], - column_converter["reason_for_staging_redeploy"]: entry[ - "reason_for_staging_redeploy" - ], + column_converter["hub_name"]: entry["hub_name"], + column_converter["reason_for_redeploy"]: entry["reason_for_redeploy"], } - formatted_support_staging_matrix.append(formatted_entry) + formatted_staging_matrix.append(formatted_entry) # Generate a Markdown table - support_staging_md_table = ( - markdown_table(formatted_support_staging_matrix) + staging_md_table = ( + markdown_table(formatted_staging_matrix) .set_params(row_sep="markdown", quote=False) .get_markdown() ) else: - support_staging_md_table = [] + staging_md_table = [] # Only execute if prod_matrix is not an empty list if prod_matrix: @@ -129,9 +132,13 @@ def create_markdown_comment(support_staging_matrix, prod_matrix): comment_body = f""" Merging this PR will trigger the following deployment actions. -### Support and Staging deployments +### Support deployments + +{support_md_table if bool(support_md_table) else 'No support upgrades will be triggered'} + +### Staging deployments -{support_staging_md_table if bool(support_staging_md_table) else 'No support or staging upgrades will be triggered'} +{staging_md_table if bool(staging_md_table) else 'No staging hub upgrades will be triggered'} ### Production deployments diff --git a/docs/hub-deployment-guide/runbooks/phase3/initial-hub-setup.md b/docs/hub-deployment-guide/runbooks/phase3/initial-hub-setup.md index 4a03a0d031..671b787681 100644 --- a/docs/hub-deployment-guide/runbooks/phase3/initial-hub-setup.md +++ b/docs/hub-deployment-guide/runbooks/phase3/initial-hub-setup.md @@ -162,13 +162,13 @@ All of the following steps must be followed in order to consider phase 3.1 compl If Dask gateway will be needed, then choose a `basehub`, and follow the guide on [how to enable dask-gateway on an existing hub](howto:features:daskhub). -1. **Add the new cluster to CI/CD** +1. **Add the new cluster and staging hub to CI/CD** ```{important} - This step is only applicable if the hub is the first hub being deployed to a cluster. + This step is only applicable if the hub is the first hub being deployed to a cluster **or** has `staging` in it's name. ``` - To ensure the new cluster and its hubs are appropriately handled by our CI/CD system, please add it as an entry in the following places: + To ensure the new cluster and its hubs are appropriately handled by our CI/CD system, please add it as an entry in the following places in the [`deploy-hubs.yaml`](https://github.com/2i2c-org/infrastructure/blob/HEAD/.github/workflows/deploy-hubs.yaml) GitHub Actions workflow file: - The [`deploy-hubs.yaml`](https://github.com/2i2c-org/infrastructure/blob/008ae2c1deb3f5b97d0c334ed124fa090df1f0c6/.github/workflows/deploy-hubs.yaml#L121) GitHub workflow has a job named [`upgrade-support-and-staging`](https://github.com/2i2c-org/infrastructure/blob/18f5a4f8f39ed98c2f5c99091ae9f19a1075c988/.github/workflows/deploy-hubs.yaml#L128-L166) that needs to list of clusters being automatically deployed by our CI/CD system. Add an entry for the new cluster here. diff --git a/docs/reference/ci-cd/hub-deploy.md b/docs/reference/ci-cd/hub-deploy.md index d6b562c103..3637607ba0 100644 --- a/docs/reference/ci-cd/hub-deploy.md +++ b/docs/reference/ci-cd/hub-deploy.md @@ -7,7 +7,8 @@ You can learn more about this workflow in our blog post [Multiple JupyterHubs, m The best place to learn about the latest state of our *automatic* hub deployment is to look at [the `deploy-hubs.yaml` GitHub Actions workflow file](https://github.com/2i2c-org/infrastructure/tree/HEAD/.github/workflows/deploy-hubs.yaml). -This workflow file depends on a locally defined action that [sets up access to a given cluster](https://github.com/2i2c-org/infrastructure/blob/main/.github/actions/setup-deploy/action.yaml) and itself contains four main jobs, detailed below. +This workflow file depends on a locally defined action that [sets up access to a given cluster](https://github.com/2i2c-org/infrastructure/blob/main/.github/actions/setup-deploy/action.yaml) and itself contains a range of jobs, the most relevant ones of which are detailed below. +There are also some filtering/optimisation jobs which are not discussed here. ## Main hub deployment workflow @@ -15,54 +16,41 @@ This workflow file depends on a locally defined action that [sets up access to a ### 1. `generate-jobs`: Generate Helm upgrade jobs The first job takes a list of files that have been added/modified as part of a Pull Request and pipes them into the [`generate-helm-upgrade-jobs` sub-command](https://github.com/2i2c-org/infrastructure/blob/main/deployer/helm_upgrade_decision.py) of the [deployer module](https://github.com/2i2c-org/infrastructure/tree/main/deployer). -This sub-command uses a set of functions to calculate which hubs on which clusters require a helm upgrade, alongside whether the support chart and staging hub on that cluster should also be upgraded. -If any production hubs require an upgrade, the upgrade of the staging hub is a requirement. +This sub-command uses a set of functions to calculate which hubs on which clusters require a helm upgrade, alongside whether the support chart and staging hub(s) on that cluster should also be upgraded. +If any production hubs require an upgrade, the upgrade of the staging hub(s) is a requirement. This job provides the following outputs: -- Two JSON objects that can be read by later GitHub Actions jobs to define matrix jobs. - These JSON objects detail: which clusters require their support chart and/or staging hub to be upgraded, and which production hubs require an upgrade. +- Three JSON objects that can be read by later GitHub Actions jobs to define matrix jobs. + These JSON objects detail: which clusters require their support chart to be upgraded, which staging hub(s) require an upgrade, and which production hubs require an upgrade. - The above JSON objects are also rendered as human-readable tables using [`rich`](https://github.com/Textualize/rich). -````{admonition} Some special cased filepaths +```{admonition} Some special cased filepaths While the aim of this workflow is to only upgrade the pieces of the infrastructure that require it with every change, some changes do require us to redeploy everything. - If a cluster's `cluster.yaml` file has been modified, we upgrade the support chart and **all** hubs on **that** cluster. This is because we cannot tell what has been changed without inspecting the diff of the file. - If any of the `basehub` or `daskhub` Helm charts have additions/modifications in their paths, we redeploy **all** hubs across **all** clusters. -- If the support Helm chart has additions/modifications in its path, we redeploy the support chart on **all** clusters. -- If the deployer module has additions/modifications in its path, then we redeploy **all** hubs on **all** clusters. - -```{attention} -Right now, we redeploy everything when the deployer changes since the deployer undertakes some tasks that generates config related to authentication. -This may change in the future as we move towards the deployer becoming a separable, stand-alone package. +- If the `support` Helm chart has additions/modifications in its path, we redeploy the support chart on **all** clusters. +- If the `deployer` module has additions/modifications in its path, then we redeploy **all** hubs on **all** clusters. ``` -```` -### 2. `upgrade-support-and-staging`: Upgrade support and staging hub Helm charts on clusters that require it +### 2. `upgrade-support`: Upgrade support Helm chart on clusters that require it -The next job reads in one of the JSON objects detailed above that defines which clusters need their support chart and/or staging hub upgrading. -*Note that it is not a requirement for both the support chart and staging hub to be upgraded during this job.* +The next job reads in one of the JSON objects detailed above that defines which clusters need their support chart upgrading. A matrix job is set up that parallelises over all the clusters defined in the JSON object. -For each cluster, the support chart is first upgraded (if required) followed by the staging hub (if required). +For each cluster, the support chart is upgraded (if required). +We set an output variable from this job to determine if any support chart upgrades fail for a cluster. +We then use these outputs to filter out the failed clusters and prevent further deployments to them, without impairing deployments to unrelated clusters. -```{note} -The 2i2c cluster is a special case here as it has three staging hubs: one running the `basehub` Helm chart and another running the `daskhub` Helm chart. -We therefore run extra steps for the 2i2c cluster to upgrade these hubs (if required). -``` +### 3. `upgrade-staging`: Upgrade Helm chart for staging hub(s) in parallel +Next we deploy the staging hub(s) on a cluster. We use staging hubs as [canary deployments](https://sre.google/workbook/canarying-releases/) and prevent deploying production hubs if a staging deployment fails. -Hence, the last step of this job is to set an output variable that stores if the job completed successfully or failed. - -### 3. `filter-generate-jobs`: Filter out jobs for clusters whose support/staging job failed +Similarly to `upgrade-support`, the last step of this job is to set an output variable that stores if the job completed successfully or failed. -This job is an optimisation job. -While we do want to prevent all production hubs on Cluster X from being upgraded if its support/staging job fails, we **don't** want to prevent the production hubs on Cluster Y from being upgraded because the support/staging job for Cluster X failed. +### 4. `upgrade-prod`: Upgrade Helm chart for production hubs in parallel -This job reads in the production hub job definitions generated in job 1 and the support/staging success/failure variables set in job 2, then proceeds to filter out the productions hub upgrade jobs that were due to be run on a cluster whose support/staging job failed. - -### 4. `upgrade-prod-hubs`: Upgrade Helm chart for production hubs in parallel - -This last job deploys all production hubs that require it in parallel to the clusters that successfully completed job 2. +This last job deploys all production hubs that require it in parallel to the clusters that successfully completed a staging upgrade. (cicd/hub/pr-comment)= ## Posting the deployment plan as a comment on a Pull Request @@ -82,7 +70,6 @@ This workflow downloads the artifacts uploaded by `generate-jobs` and then uses - Either update an existing comment or create a new comment on the PR posting the Markdown tables downloaded as an artifact. ```{admonition} Why we're using artifacts and separate workflow files - Any secrets used by GitHub Actions are not available to Pull Requests that come from forks by default to protect against malicious code being executed with privileged access. `generate-jobs` needs to run in the PR context in order to establish which files are added/modified, but the required secrets would not be available for the rest of the workflow that would post a comment to the PR. To overcome this in a secure manner, we upload the required information (the body of the comment to be posted and the number of the PR the comment should be posted to) as artifacts. diff --git a/tests/test-clusters/cluster3/cluster.yaml b/tests/test-clusters/cluster3/cluster.yaml new file mode 100644 index 0000000000..62e3ee8282 --- /dev/null +++ b/tests/test-clusters/cluster3/cluster.yaml @@ -0,0 +1,15 @@ +name: cluster3 +provider: gcp +support: + helm_chart_values_files: + - support.values.yaml +hubs: + - name: staging1 + helm_chart_values_files: + - staging1.values.yaml + - name: staging2 + helm_chart_values_files: + - staging2.values.yaml + - name: prod + helm_chart_values_files: + - prod.values.yaml diff --git a/tests/test_helm_upgrade_decision.py b/tests/test_helm_upgrade_decision.py index 571bec7487..98c7df68a9 100644 --- a/tests/test_helm_upgrade_decision.py +++ b/tests/test_helm_upgrade_decision.py @@ -7,10 +7,9 @@ from deployer.commands.generate.helm_upgrade.decision import ( assign_staging_jobs_for_missing_clusters, discover_modified_common_files, - ensure_support_staging_jobs_have_correct_keys, + filter_out_staging_hubs, generate_hub_matrix_jobs, generate_support_matrix_jobs, - move_staging_hubs_to_staging_matrix, ) from deployer.utils.file_acquisition import get_all_cluster_yaml_files @@ -24,6 +23,7 @@ def test_get_all_cluster_yaml_files(): expected_cluster_files = { clusters_path.joinpath("cluster1/cluster.yaml"), clusters_path.joinpath("cluster2/cluster.yaml"), + clusters_path.joinpath("cluster3/cluster.yaml"), } with mock.patch( @@ -35,7 +35,42 @@ def test_get_all_cluster_yaml_files(): assert isinstance(result_cluster_files, set) -def test_generate_hub_matrix_jobs_one_hub(): +def test_generate_hub_matrix_jobs_one_staging_hub(): + cluster_file = root_path.joinpath("tests/test-clusters/cluster1/cluster.yaml") + with open(cluster_file) as f: + cluster_config = yaml.load(f) + + cluster_info = { + "cluster_name": cluster_config.get("name", {}), + "provider": cluster_config.get("provider", {}), + "reason_for_redeploy": "", + } + + modified_file = { + root_path.joinpath("tests/test-clusters/cluster1/staging.values.yaml"), + } + + expected_matrix_jobs = [ + { + "provider": "gcp", + "cluster_name": "cluster1", + "hub_name": "staging", + "reason_for_redeploy": "Following helm chart values files were modified: staging.values.yaml", + } + ] + + result_staging_matrix_jobs, result_prod_matrix_jobs = generate_hub_matrix_jobs( + cluster_file, cluster_config, cluster_info, modified_file + ) + + case.assertCountEqual(result_staging_matrix_jobs, expected_matrix_jobs) + assert result_prod_matrix_jobs == [] + assert isinstance(result_staging_matrix_jobs, list) + assert isinstance(result_prod_matrix_jobs, list) + assert isinstance(result_staging_matrix_jobs[0], dict) + + +def test_generate_hub_matrix_jobs_one_prod_hub(): cluster_file = root_path.joinpath("tests/test-clusters/cluster1/cluster.yaml") with open(cluster_file) as f: cluster_config = yaml.load(f) @@ -59,13 +94,15 @@ def test_generate_hub_matrix_jobs_one_hub(): } ] - result_matrix_jobs = generate_hub_matrix_jobs( + result_staging_matrix_jobs, result_prod_matrix_jobs = generate_hub_matrix_jobs( cluster_file, cluster_config, cluster_info, modified_file ) - case.assertCountEqual(result_matrix_jobs, expected_matrix_jobs) - assert isinstance(result_matrix_jobs, list) - assert isinstance(result_matrix_jobs[0], dict) + case.assertCountEqual(result_prod_matrix_jobs, expected_matrix_jobs) + assert result_staging_matrix_jobs == [] + assert isinstance(result_staging_matrix_jobs, list) + assert isinstance(result_prod_matrix_jobs, list) + assert isinstance(result_prod_matrix_jobs[0], dict) def test_generate_hub_matrix_jobs_many_hubs(): @@ -99,7 +136,7 @@ def test_generate_hub_matrix_jobs_many_hubs(): }, ] - result_matrix_jobs = generate_hub_matrix_jobs( + _, result_matrix_jobs = generate_hub_matrix_jobs( cluster_file, cluster_config, cluster_info, @@ -130,13 +167,16 @@ def test_generate_hub_matrix_jobs_all_hubs(): bool_options = [(True, False), (False, True), (True, True)] for reason, bool_option in zip(reasons, bool_options): - expected_matrix_jobs = [ + expected_staging_matrix_jobs = [ { "provider": "gcp", "cluster_name": "cluster1", "hub_name": "staging", "reason_for_redeploy": reason, - }, + } + ] + + expected_prod_matrix_jobs = [ { "provider": "gcp", "cluster_name": "cluster1", @@ -157,7 +197,7 @@ def test_generate_hub_matrix_jobs_all_hubs(): }, ] - result_matrix_jobs = generate_hub_matrix_jobs( + result_staging_matrix_jobs, result_prod_matrix_jobs = generate_hub_matrix_jobs( cluster_file, cluster_config, cluster_info, @@ -166,9 +206,12 @@ def test_generate_hub_matrix_jobs_all_hubs(): upgrade_all_hubs_on_all_clusters=bool_option[1], ) - case.assertCountEqual(result_matrix_jobs, expected_matrix_jobs) - assert isinstance(result_matrix_jobs, list) - assert isinstance(result_matrix_jobs[0], dict) + case.assertCountEqual(result_staging_matrix_jobs, expected_staging_matrix_jobs) + case.assertCountEqual(result_prod_matrix_jobs, expected_prod_matrix_jobs) + assert isinstance(result_staging_matrix_jobs, list) + assert isinstance(result_prod_matrix_jobs, list) + assert isinstance(result_staging_matrix_jobs[0], dict) + assert isinstance(result_prod_matrix_jobs[0], dict) def test_generate_hub_matrix_jobs_skip_deploy_label(): @@ -188,13 +231,12 @@ def test_generate_hub_matrix_jobs_skip_deploy_label(): pr_labels = ["unrelated1", "deployer:skip-deploy", "unrelated2"] - expected_matrix_jobs = [] - - result_matrix_jobs = generate_hub_matrix_jobs( + result_staging_jobs, result_prod_jobs = generate_hub_matrix_jobs( cluster_file, cluster_config, cluster_info, modified_file, pr_labels ) - case.assertCountEqual(result_matrix_jobs, expected_matrix_jobs) + case.assertCountEqual(result_staging_jobs, []) + case.assertCountEqual(result_prod_jobs, []) def test_generate_support_matrix_jobs_one_cluster(): @@ -216,8 +258,7 @@ def test_generate_support_matrix_jobs_one_cluster(): { "provider": "gcp", "cluster_name": "cluster1", - "upgrade_support": True, - "reason_for_support_redeploy": "Following helm chart values files were modified: support.values.yaml", + "reason_for_redeploy": "Following helm chart values files were modified: support.values.yaml", } ] @@ -253,8 +294,7 @@ def test_generate_support_matrix_jobs_all_clusters(): { "provider": "gcp", "cluster_name": "cluster1", - "upgrade_support": True, - "reason_for_support_redeploy": reason, + "reason_for_redeploy": reason, } ] @@ -328,7 +368,7 @@ def test_discover_modified_common_files_support_helm_chart(): assert not upgrade_all_hubs -def test_move_staging_hubs_to_staging_matrix_job_exists(): +def test_filter_out_staging_hubs_job_exists(): input_hub_matrix_jobs = [ { "cluster_name": "cluster1", @@ -343,16 +383,16 @@ def test_move_staging_hubs_to_staging_matrix_job_exists(): "reason_for_redeploy": "cluster.yaml file was modified", }, ] - input_support_staging_matrix_jobs = [ + + expected_staging_matrix_jobs = [ { "cluster_name": "cluster1", "provider": "gcp", - "upgrade_support": True, - "reason_for_support_redeploy": "cluster.yaml file was modified", + "hub_name": "staging", + "reason_for_redeploy": "cluster.yaml file was modified", } ] - - expected_hub_matrix_jobs = [ + expected_prod_hub_matrix_jobs = [ { "cluster_name": "cluster1", "provider": "gcp", @@ -360,31 +400,19 @@ def test_move_staging_hubs_to_staging_matrix_job_exists(): "reason_for_redeploy": "cluster.yaml file was modified", }, ] - expected_support_staging_matrix_jobs = [ - { - "cluster_name": "cluster1", - "provider": "gcp", - "upgrade_support": True, - "reason_for_support_redeploy": "cluster.yaml file was modified", - "upgrade_staging": True, - "reason_for_staging_redeploy": "cluster.yaml file was modified", - } - ] ( - result_hub_matrix_jobs, - result_support_staging_matrix_jobs, - ) = move_staging_hubs_to_staging_matrix( - input_hub_matrix_jobs, input_support_staging_matrix_jobs - ) + result_staging_matrix_jobs, + result_prod_hub_matrix_jobs, + ) = filter_out_staging_hubs(input_hub_matrix_jobs) + + case.assertCountEqual(result_staging_matrix_jobs, expected_staging_matrix_jobs) + case.assertCountEqual(result_prod_hub_matrix_jobs, expected_prod_hub_matrix_jobs) - case.assertCountEqual(result_hub_matrix_jobs, expected_hub_matrix_jobs) - case.assertCountEqual( - result_support_staging_matrix_jobs, expected_support_staging_matrix_jobs - ) +def test_filter_out_staging_hubs_job_does_not_exist(): + clusters_path = root_path.joinpath("tests/test-clusters") -def test_move_staging_hubs_to_staging_matrix_job_does_not_exist(): input_hub_matrix_jobs = [ { "cluster_name": "cluster1", @@ -399,106 +427,69 @@ def test_move_staging_hubs_to_staging_matrix_job_does_not_exist(): "reason_for_redeploy": "cluster.yaml file was modified", }, ] - input_support_staging_matrix_jobs = [] - expected_hub_matrix_jobs = [ + expected_staging_matrix_jobs = [ { "cluster_name": "cluster1", "provider": "gcp", - "hub_name": "hub1", + "hub_name": "staging", "reason_for_redeploy": "cluster.yaml file was modified", - }, + } ] - expected_support_staging_matrix_jobs = [ + expected_prod_hub_matrix_jobs = [ { "cluster_name": "cluster1", "provider": "gcp", - "upgrade_support": False, - "reason_for_support_redeploy": "", - "upgrade_staging": True, - "reason_for_staging_redeploy": "cluster.yaml file was modified", - } + "hub_name": "hub1", + "reason_for_redeploy": "cluster.yaml file was modified", + }, ] - ( - result_hub_matrix_jobs, - result_support_staging_matrix_jobs, - ) = move_staging_hubs_to_staging_matrix( - input_hub_matrix_jobs, input_support_staging_matrix_jobs - ) + with mock.patch( + "deployer.utils.file_acquisition.CONFIG_CLUSTERS_PATH", clusters_path + ): + ( + result_staging_matrix_jobs, + result_prod_hub_matrix_jobs, + ) = filter_out_staging_hubs(input_hub_matrix_jobs) - case.assertCountEqual(result_hub_matrix_jobs, expected_hub_matrix_jobs) - case.assertCountEqual( - result_support_staging_matrix_jobs, expected_support_staging_matrix_jobs - ) + case.assertCountEqual(result_staging_matrix_jobs, expected_staging_matrix_jobs) + case.assertCountEqual(result_prod_hub_matrix_jobs, expected_prod_hub_matrix_jobs) -def test_ensure_support_staging_jobs_have_correct_keys_hubs_exist(): - input_support_staging_jobs = [ - { - "cluster_name": "cluster1", - "provider": "gcp", - "upgrade_support": False, - "reason_for_support_upgrade": "", - } - ] +def test_assign_staging_jobs_for_missing_clusters_is_missing(): + clusters_path = root_path.joinpath("tests/test-clusters") - input_hub_jobs = [ + input_prod_jobs = [ { - "cluster_name": "cluster1", "provider": "gcp", + "cluster_name": "cluster1", "hub_name": "hub1", - "reason_for_redeploy": "", - } + }, ] - expected_support_staging_jobs = [ + expected_staging_jobs = [ { - "cluster_name": "cluster1", "provider": "gcp", - "upgrade_support": False, - "reason_for_support_upgrade": "", - "upgrade_staging": True, - "reason_for_staging_redeploy": "Following prod hubs require redeploy: hub1", - } - ] - - result_support_staging_jobs = ensure_support_staging_jobs_have_correct_keys( - input_support_staging_jobs, input_hub_jobs - ) - - case.assertCountEqual(result_support_staging_jobs, expected_support_staging_jobs) - - -def test_ensure_support_staging_jobs_have_correct_keys_hubs_dont_exist(): - input_support_staging_jobs = [ - { "cluster_name": "cluster1", - "provider": "gcp", - "upgrade_support": False, - "reason_for_support_upgrade": "", + "hub_name": "staging", + "reason_for_redeploy": "Following prod hubs require redeploy: hub1", } ] - expected_support_staging_jobs = [ - { - "cluster_name": "cluster1", - "provider": "gcp", - "upgrade_support": False, - "reason_for_support_upgrade": "", - "upgrade_staging": False, - "reason_for_staging_redeploy": "", - } - ] + with mock.patch( + "deployer.utils.file_acquisition.CONFIG_CLUSTERS_PATH", clusters_path + ): + result_staging_jobs = assign_staging_jobs_for_missing_clusters( + [], input_prod_jobs + ) - result_support_staging_jobs = ensure_support_staging_jobs_have_correct_keys( - input_support_staging_jobs, [] - ) + case.assertCountEqual(result_staging_jobs, expected_staging_jobs) - case.assertCountEqual(result_support_staging_jobs, expected_support_staging_jobs) +def test_assign_staging_jobs_for_missing_clusters_is_present(): + clusters_path = root_path.joinpath("tests/test-clusters") -def test_assign_staging_jobs_for_missing_clusters_is_missing(): input_prod_jobs = [ { "provider": "gcp", @@ -507,57 +498,66 @@ def test_assign_staging_jobs_for_missing_clusters_is_missing(): }, ] - expected_support_staging_jobs = [ + input_staging_jobs = [ { "provider": "gcp", "cluster_name": "cluster1", - "upgrade_support": False, - "reason_for_support_redeploy": "", - "upgrade_staging": True, - "reason_for_staging_redeploy": "Following prod hubs require redeploy: hub1", + "hub_name": "staging", + "reason_for_redeploy": "Following prod hubs require redeploy: hub1", } ] - result_support_staging_jobs = assign_staging_jobs_for_missing_clusters( - [], input_prod_jobs - ) + expected_staging_jobs = [ + { + "provider": "gcp", + "cluster_name": "cluster1", + "hub_name": "staging", + "reason_for_redeploy": "Following prod hubs require redeploy: hub1", + } + ] + + with mock.patch( + "deployer.utils.file_acquisition.CONFIG_CLUSTERS_PATH", clusters_path + ): + result_staging_jobs = assign_staging_jobs_for_missing_clusters( + input_staging_jobs, input_prod_jobs + ) - case.assertCountEqual(result_support_staging_jobs, expected_support_staging_jobs) + case.assertCountEqual(result_staging_jobs, expected_staging_jobs) -def test_assign_staging_jobs_for_missing_clusters_is_present(): +def test_assign_staging_jobs_for_missing_clusters_is_missing_many_staging(): + clusters_path = root_path.joinpath("tests/test-clusters") + input_prod_jobs = [ { "provider": "gcp", - "cluster_name": "cluster1", - "hub_name": "hub1", + "cluster_name": "cluster3", + "hub_name": "prod", }, ] - input_support_staging_jobs = [ + expected_staging_jobs = [ { "provider": "gcp", - "cluster_name": "cluster1", - "upgrade_support": False, - "reason_for_support_redeploy": "", - "upgrade_staging": True, - "reason_for_staging_redeploy": "Following prod hubs require redeploy: hub1", - } - ] - - expected_support_staging_jobs = [ + "cluster_name": "cluster3", + "hub_name": "staging1", + "reason_for_redeploy": "Following prod hubs require redeploy: prod", + }, { "provider": "gcp", - "cluster_name": "cluster1", - "upgrade_support": False, - "reason_for_support_redeploy": "", - "upgrade_staging": True, - "reason_for_staging_redeploy": "Following prod hubs require redeploy: hub1", - } + "cluster_name": "cluster3", + "hub_name": "staging2", + "reason_for_redeploy": "Following prod hubs require redeploy: prod", + }, ] - result_support_staging_jobs = assign_staging_jobs_for_missing_clusters( - input_support_staging_jobs, input_prod_jobs - ) + with mock.patch( + "deployer.utils.file_acquisition.CONFIG_CLUSTERS_PATH", clusters_path + ): + result_staging_jobs = assign_staging_jobs_for_missing_clusters( + [], input_prod_jobs + ) + print(result_staging_jobs) - case.assertCountEqual(result_support_staging_jobs, expected_support_staging_jobs) + case.assertCountEqual(result_staging_jobs, expected_staging_jobs)