From 2f9965107821f572403c2ec9b7b7018f34b050f9 Mon Sep 17 00:00:00 2001 From: Neal DeBuhr Date: Sat, 8 Jun 2024 14:32:38 +0000 Subject: [PATCH] Clean up quality scanning, permissions provisioning, and cert handling --- .github/workflows/quality.yml | 23 -------------- README.md | 58 +++++++++++++++++++++------------- deploy/templates/certbot.yaml | 38 ++++++++++++++++++++-- deploy/templates/keycloak.yaml | 36 ++------------------- prepare/chart/values.yaml | 6 ++++ provision/cert-manager/iam.tf | 18 +++++++++++ provision/gke-beta/gke.tf | 3 ++ provision/gke/gke.tf | 3 ++ roles/provisioner.yaml | 21 ++++++++++++ 9 files changed, 125 insertions(+), 81 deletions(-) delete mode 100644 .github/workflows/quality.yml create mode 100644 provision/cert-manager/iam.tf create mode 100644 roles/provisioner.yaml diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml deleted file mode 100644 index 683a63d..0000000 --- a/.github/workflows/quality.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: quality - -on: - push: - branches: - - master - workflow_dispatch: - -jobs: - quality: - runs-on: ubuntu-latest - steps: - - name: code checkout - uses: actions/checkout@v2 - with: - # Disabling shallow clone is recommended for improving relevancy of reporting - fetch-depth: 0 - - - name: sonarcloud scan - uses: sonarsource/sonarcloud-github-action@master - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} diff --git a/README.md b/README.md index ee25b6a..fcb0556 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,6 @@ [![Build Workflow](https://github.com/ndebuhr/cloud-native-workstation/workflows/build/badge.svg)](https://github.com/ndebuhr/sim/actions) [![Deploy Workflow](https://github.com/ndebuhr/cloud-native-workstation/workflows/deploy/badge.svg)](https://github.com/ndebuhr/sim/actions) -[![Sonarcloud Status](https://sonarcloud.io/api/project_badges/measure?project=cloud-native-workstation&metric=alert_status)](https://sonarcloud.io/dashboard?id=cloud-native-workstation) [![Readme Standard](https://img.shields.io/badge/readme%20style-standard-brightgreen.svg)](https://github.com/RichardLitt/standard-readme) [![MIT License](https://img.shields.io/badge/license-MIT-yellow.svg)](https://opensource.org/licenses/MIT) @@ -85,25 +84,25 @@ git submodule update ### Google Kubernetes Service -If you would like to provision a new Kubernetes cluster on Google Kubernetes Engine to run your workstation, follow the steps below. -1. Create a Cloud Native Workstation role in Google Cloud Platform with the following permissions: - 1. compute.instanceGroupManagers.get - 1. container.clusters.create - 1. container.clusters.delete - 1. container.clusters.get - 1. container.clusters.update - 1. container.operations.get -1. Create a new service account and assign the Cloud Native Workstation and Service Account User roles -1. Generate a service account key -1. Set the GCP authentication environment variable - ```bash - export GOOGLE_APPLICATION_CREDENTIALS=YOUR_KEY_FILE.json - ``` -1. Set the GCP project environment variable - ```bash - export GOOGLE_PROJECT=YOUR_PROJECT - ``` -1. Navigate to the desired provisioning directory - either [provision/gke](provision/gke) or [provision/gke-with-gpu](provision/gke-with-gpu). The [gke](provision/gke) specification creates a "normal" cluster with a single node pool. The [gke-with-gpu](provision/gke-with-gpu) specification adds Nvidia T4 GPU capabilities to the Jupyter component, for AI/ML/GPU workloads. If you do not want to enable the Jupyter component, or want it but for non-AI/ML/GPU workloads, then use the [gke](provision/gke) specification. The [gke](provision/gke) specification is recommended for most users. Once you've navigated to the desired infrastructure specification directory, provision with: +If you would like to provision a new Kubernetes cluster on Google Kubernetes Engine to run your workstation, set the `GOOGLE_PROJECT` environment variable, then follow the steps below: +```bash +gcloud iam roles create workstation_provisioner \ + --project=$GOOGLE_PROJECT \ + --file=roles/provisioner.yaml +gcloud iam service-accounts create workstation-provisioner \ + --display-name="Workstation Provisioner" +gcloud projects add-iam-policy-binding $GOOGLE_PROJECT \ + --member="serviceAccount:workstation-provisioner@$GOOGLE_PROJECT.iam.gserviceaccount.com" \ + --role="projects/$GOOGLE_PROJECT/roles/workstation_provisioner" +gcloud projects add-iam-policy-binding $GOOGLE_PROJECT \ + --member="serviceAccount:workstation-provisioner@$GOOGLE_PROJECT.iam.gserviceaccount.com" \ + --role="roles/iam.serviceAccountUser" +gcloud iam service-accounts keys create workstation-provisioner.json \ + --iam-account="workstation-provisioner@$GOOGLE_PROJECT.iam.gserviceaccount.com" +``` +Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of the newly created key. + +Navigate to the desired provisioning directory - either [provision/gke](provision/gke) or [provision/gke-with-gpu](provision/gke-with-gpu). The [gke](provision/gke) specification creates a "normal" cluster with a single node pool. The [gke-with-gpu](provision/gke-with-gpu) specification adds Nvidia T4 GPU capabilities to the Jupyter component, for AI/ML/GPU workloads. If you do not want to enable the Jupyter component, or want it but for non-AI/ML/GPU workloads, then use the [gke](provision/gke) specification. The [gke](provision/gke) specification is recommended for most users. Once you've navigated to the desired infrastructure specification directory, provision with: 1. Using the default zone (us-central1-a) and cluster name (cloud-native-workstation): ``` terraform init @@ -190,10 +189,17 @@ kubectl config set-context --current --namespace cloud-native-workstation ## Prepare SSL -Secure SSL setup is required. There are two options for SSL certificates: -1. Automated SSL certificate generation using Let's Encrypt, Certbot, and the DNS01 challenge with Google Cloud DNS +Secure SSL setup is required. There are three options for SSL certificates: +1. Cert Manager certificate provisioning and management, on top of Google Kubernetes Engine +1. Automated SSL certificate generation using Let's Encrypt, Certbot, and the DNS01 challenge 1. Bring your own certificate +### Cert Manager with GKE + +1. Use Terraform to provision the resources in [provision/cert-manager](provision/cert-manager) + +Later, during the helm installation, be sure `certbot.enabled` is `true`, `certbot.type` is `cert-manager-google` in the [deployment Helm values](deploy/values.yaml), and make sure `certManager.enabled` is `true` in the [preparation Helm values](prepare/values.yaml). + ### Certbot with Google Cloud Platform DNS 1. In Google Cloud Platform, create a Cloud DNS zone for your domain @@ -331,6 +337,14 @@ helm install workstation-prerequisites . -n kube-system cd ../.. ``` +If using Cert Manager for TLS certificates: +```bash +kubectl annotate serviceaccount workstation-prerequisites-cert-manager \ + --namespace=kube-system \ + --overwrite \ + "iam.gke.io/gcp-service-account=workstation-cert-manager@$GOOGLE_PROJECT.iam.gserviceaccount.com" +``` + ### CRDs installation Constraint templates provide policy-based workstation controls and security. If you choose not to install these constraint templates, ensure `policies.enabled` is set to `false` in the [helm values](deploy/values.yaml). Install with: diff --git a/deploy/templates/certbot.yaml b/deploy/templates/certbot.yaml index b90ad04..9622789 100644 --- a/deploy/templates/certbot.yaml +++ b/deploy/templates/certbot.yaml @@ -15,7 +15,7 @@ spec: {{ include "tolerations" .Values.certbot | indent 6 }} initContainers: - name: certbot - image: certbot/dns-google:v1.22.0 + image: certbot/dns-google:v2.10.0 command: ["/bin/sh", "-c"] args: - | @@ -56,7 +56,7 @@ spec: emptyDir: {} restartPolicy: OnFailure --- -apiVersion: batch/v1beta1 +apiVersion: batch/v1 kind: CronJob metadata: name: certbot @@ -419,4 +419,38 @@ rules: - create - apply - patch +{{ if eq .Values.certbot.type "cert-manager-google" }} +--- +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: cloud-dns-issuer +spec: + acme: + server: https://acme-v02.api.letsencrypt.org/directory + privateKeySecretRef: + name: cloud-dns-issuer-key + solvers: + - dns01: + cloudDNS: + project: {{ .Values.certbot.project }} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: workstation +spec: + secretName: workstation-tls + issuerRef: + kind: ClusterIssuer + name: cloud-dns-issuer + dnsNames: + - keycloak.{{ .Values.domain }} + - oauth2-proxy.{{ .Values.domain }} + {{ $root := . }} + {{ $backends := include "backends" . | split "," }} + {{ range $backend := $backends }} + - "{{ $backend }}.{{ $root.Values.domain }}" + {{ end }} +{{ end }} {{ end }} \ No newline at end of file diff --git a/deploy/templates/keycloak.yaml b/deploy/templates/keycloak.yaml index 4337dbe..17fb180 100644 --- a/deploy/templates/keycloak.yaml +++ b/deploy/templates/keycloak.yaml @@ -194,6 +194,7 @@ rules: - create - update - watch + - delete - apiGroups: - batch resources: @@ -417,30 +418,6 @@ spec: - name: "OAUTH2_PROXY_UPSTREAMS" value: "file:///dev/null" --- -apiVersion: autoscaling/v2beta2 -kind: HorizontalPodAutoscaler -metadata: - name: oauth2-proxy -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: oauth2-proxy - minReplicas: 1 - maxReplicas: 3 - metrics: - - type: Object - object: - metric: - name: requests-per-second - describedObject: - apiVersion: networking.k8s.io/v1 - kind: Ingress - name: oauth2-proxy - target: - type: Value - value: 5k ---- apiVersion: v1 kind: Service metadata: @@ -473,15 +450,6 @@ spec: app: ingress-nginx - namespaceSelector: {} egress: - - to: - - podSelector: - matchLabels: - app: ingress-nginx - - namespaceSelector: {} - - to: - - podSelector: - matchLabels: - app: keycloak - component: keycloak + - {} {{ end }} {{ end }} \ No newline at end of file diff --git a/prepare/chart/values.yaml b/prepare/chart/values.yaml index d2db5c7..8cf9638 100644 --- a/prepare/chart/values.yaml +++ b/prepare/chart/values.yaml @@ -1,3 +1,8 @@ +# Cert Manager for management of TLS certificates (only used if certbot.enabled and certbot.type == google) +cert-manager: + enabled: false + installCRDs: true +# Elastic file storage driver aws-efs-csi-driver: enabled: false controller: @@ -10,6 +15,7 @@ aws-efs-csi-driver: provisioningMode: efs-ap fileSystemId: fs-00000000000000000 directoryPerms: "777" +# Nginx ingress system ingress-nginx: controller: ingressClassResource: diff --git a/provision/cert-manager/iam.tf b/provision/cert-manager/iam.tf new file mode 100644 index 0000000..d6c6b06 --- /dev/null +++ b/provision/cert-manager/iam.tf @@ -0,0 +1,18 @@ +data "google_project" "project" {} + +resource "google_service_account" "cert_manager" { + account_id = "workstation-cert-manager" + display_name = "Workstation Cert Manager" +} + +resource "google_project_iam_member" "cert_manager_dns_admin" { + project = data.google_project.project.project_id + role = "roles/dns.admin" + member = "serviceAccount:${google_service_account.cert_manager.email}" +} + +resource "google_project_iam_member" "cert_manager_workload_identity_user" { + project = data.google_project.project.project_id + role = "roles/iam.workloadIdentityUser" + member = "serviceAccount:${data.google_project.project.project_id}.svc.id.goog[kube-system/workstation-prerequisites-cert-manager]" +} \ No newline at end of file diff --git a/provision/gke-beta/gke.tf b/provision/gke-beta/gke.tf index 500fb92..3c8618d 100644 --- a/provision/gke-beta/gke.tf +++ b/provision/gke-beta/gke.tf @@ -7,6 +7,9 @@ resource "google_container_cluster" "primary" { initial_node_count = 1 enable_shielded_nodes = true resource_labels = var.labels + workload_identity_config { + workload_pool = "${data.google_project.project.project_id}.svc.id.goog" + } cluster_autoscaling { enabled = false autoscaling_profile = "OPTIMIZE_UTILIZATION" diff --git a/provision/gke/gke.tf b/provision/gke/gke.tf index d7732d2..a9bd451 100644 --- a/provision/gke/gke.tf +++ b/provision/gke/gke.tf @@ -6,6 +6,9 @@ resource "google_container_cluster" "primary" { initial_node_count = 1 enable_shielded_nodes = true resource_labels = var.labels + workload_identity_config { + workload_pool = "${data.google_project.project.project_id}.svc.id.goog" + } network_policy { enabled = true } diff --git a/roles/provisioner.yaml b/roles/provisioner.yaml new file mode 100644 index 0000000..0ba359c --- /dev/null +++ b/roles/provisioner.yaml @@ -0,0 +1,21 @@ +title: "Workstation Provisioner" +description: "Role for automated Terraform provisioning of Cloud-Native Workstation systems" +stage: Beta +includedPermissions: +- compute.instanceGroupManagers.get +- container.clusters.create +- container.clusters.delete +- container.clusters.get +- container.clusters.update +- container.operations.get +- iam.serviceAccountKeys.create +- iam.serviceAccountKeys.get +- iam.serviceAccounts.create +- iam.serviceAccounts.delete +- iam.serviceAccounts.actAs +- iam.serviceAccounts.get +- iam.serviceAccounts.list +- iam.serviceAccounts.update +- resourcemanager.projects.get +- resourcemanager.projects.getIamPolicy +- resourcemanager.projects.setIamPolicy \ No newline at end of file