diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..5207c037 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,43 @@ +name: Lint Charts + +on: + pull_request: + paths: charts/** + +jobs: + lint-test: + runs-on: ubuntu-latest + environment: Helm Charts + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Set up Helm + uses: azure/setup-helm@v3.4 + with: + version: v3.16.3 + + # Python is required because `ct lint` runs Yamale (https://github.com/23andMe/Yamale) and + # yamllint (https://github.com/adrienverge/yamllint) which require Python + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Set up chart-testing + uses: helm/chart-testing-action@v2.6.1 + with: + version: v3.11.0 + + - name: Run chart-testing (list-changed) + id: list-changed + run: | + changed=$(ct list-changed --config ct.yaml) + if [[ -n "$changed" ]]; then + echo "::set-output name=changed::true" + fi + + - name: Run chart-testing (lint) + run: ct lint --config ct.yaml diff --git a/.github/workflows/test-operator-wandb.yaml b/.github/workflows/test-operator-wandb.yaml new file mode 100644 index 00000000..a75754cf --- /dev/null +++ b/.github/workflows/test-operator-wandb.yaml @@ -0,0 +1,71 @@ +name: Test operator-wandb Chart + +on: + pull_request: + paths: charts/operator-wandb/** + +jobs: + test: + name: Test Chart + strategy: + fail-fast: false + matrix: + k8s-version: ["v1.32.1", "v1.31.4", "v1.30.8"] + configuration: ["default", "separate-pods", "runs-v2-bufstream"] + runs-on: ubuntu-latest + environment: Helm Charts + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Set up Helm + uses: azure/setup-helm@v3.4 + with: + version: v3.16.3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Set up chart-testing + uses: helm/chart-testing-action@v2.6.1 + with: + version: v3.11.0 + + - name: Run chart-testing (list-changed) + id: list-changed + run: | + changed=$(ct list-changed --config ct.yaml) + if [[ -n "$changed" ]]; then + echo "::set-output name=changed::true" + fi + + - name: Create kind cluster + uses: helm/kind-action@v1.12.0 + with: + version: v0.26.0 + cluster_name: chart-testing-${{ matrix.k8s-version }}-${{ matrix.configuration }} + node_image: kindest/node:${{ matrix.k8s-version }} + if: env.ACT || steps.list-changed.outputs.changed == 'true' + + - name: Install Minio + run: kubectl --context kind-chart-testing-${{ matrix.k8s-version }}-${{ matrix.configuration }} apply -f test-configs/minio/default.yaml + if: env.ACT || steps.list-changed.outputs.changed == 'true' + + - name: Wait for Minio to be Ready + run: kubectl --context kind-chart-testing-${{ matrix.k8s-version }}-${{ matrix.configuration }} -n minio wait --for=condition=Ready pod/minio --timeout=300s + if: env.ACT || steps.list-changed.outputs.changed == 'true' + + - name: Run chart-testing (install) + env: + LICENSE: ${{ secrets.LICENSE }} + if: steps.list-changed.outputs.changed == 'true' + run: | + ct install \ + --charts ./charts/operator-wandb \ + --config ct.yaml \ + --helm-extra-args '--kube-context kind-chart-testing-${{ matrix.k8s-version }}-${{ matrix.configuration }} --timeout 600s' \ + --helm-extra-set-args '--values test-configs/operator-wandb/${{ matrix.configuration }}.yaml --set=license=$LICENSE' diff --git a/.github/workflows/test-operator.yaml b/.github/workflows/test-operator.yaml new file mode 100644 index 00000000..ee234ed0 --- /dev/null +++ b/.github/workflows/test-operator.yaml @@ -0,0 +1,54 @@ +# TODO(dpanzella): Uncomment this file when the ready for automated tests of the operator chart + +#name: Test operator Chart +# +#on: +# pull_request: +# paths: charts/operator/** +# +#jobs: +# test: +# runs-on: ubuntu-latest +# environment: Helm Charts +# steps: +# - name: Checkout +# uses: actions/checkout@v2 +# with: +# fetch-depth: 0 +# +# - name: Set up Helm +# uses: azure/setup-helm@v3.4 +# with: +# version: v3.16.3 +# +# # Python is required because `ct lint` runs Yamale (https://github.com/23andMe/Yamale) and +# # yamllint (https://github.com/adrienverge/yamllint) which require Python +# - name: Set up Python +# uses: actions/setup-python@v4 +# with: +# python-version: "3.10" +# +# - name: Set up chart-testing +# uses: helm/chart-testing-action@v2.6.1 +# with: +# version: v3.11.0 +# +# - name: Run chart-testing (list-changed) +# id: list-changed +# run: | +# changed=$(ct list-changed --config ct.yaml) +# if [[ -n "$changed" ]]; then +# echo "::set-output name=changed::true" +# fi +# +# - name: Create kind cluster +# uses: helm/kind-action@v1.8.0 +# with: +# version: v0.20.0 +# if: env.ACT || steps.list-changed.outputs.changed == 'true' +# +# - name: Run chart-testing (install) +# env: +# LICENSE: ${{ secrets.LICENSE }} +# if: steps.list-changed.outputs.changed == 'true' +# run: ct install --charts ./charts/operator --config ct.yaml --helm-extra-set-args --set=license=$LICENSE diff --git a/.github/workflows/lint-test.yaml b/.github/workflows/test-wandb.yaml similarity index 91% rename from .github/workflows/lint-test.yaml rename to .github/workflows/test-wandb.yaml index ff32a20b..8cf9dbb2 100644 --- a/.github/workflows/lint-test.yaml +++ b/.github/workflows/test-wandb.yaml @@ -1,9 +1,11 @@ -name: Lint and Test Charts +name: Test wandb Chart -on: pull_request +on: + pull_request: + paths: charts/wandb/** jobs: - lint-test: + test: runs-on: ubuntu-latest environment: Helm Charts steps: @@ -37,9 +39,6 @@ jobs: echo "::set-output name=changed::true" fi - - name: Run chart-testing (lint) - run: ct lint --config ct.yaml - - name: Create kind cluster uses: helm/kind-action@v1.8.0 with: diff --git a/charts/operator-wandb/Chart.yaml b/charts/operator-wandb/Chart.yaml index 5f5dbcbf..c144d371 100644 --- a/charts/operator-wandb/Chart.yaml +++ b/charts/operator-wandb/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: operator-wandb description: A Helm chart for deploying W&B to Kubernetes type: application -version: 0.25.0 +version: 0.25.1 appVersion: 1.0.0 icon: https://wandb.ai/logo.svg diff --git a/charts/operator-wandb/charts/bufstream/config/bufstream.yaml.tpl b/charts/operator-wandb/charts/bufstream/config/bufstream.yaml.tpl index 7d05b936..da2eeb21 100644 --- a/charts/operator-wandb/charts/bufstream/config/bufstream.yaml.tpl +++ b/charts/operator-wandb/charts/bufstream/config/bufstream.yaml.tpl @@ -24,7 +24,7 @@ storage: {{- end }} {{- if .Values.storage.s3.accessKeyId }} access_key_id: - string: {{ .Values.storage.s3.accessKeyId | quote }} + string: {{ tpl .Values.storage.s3.accessKeyId . | quote }} secret_access_key: path: /config/secrets/storage/secret_access_key {{- end }} @@ -40,7 +40,7 @@ storage: etcd: addresses: {{- range .Values.metadata.etcd.addresses }} - - host: {{ .host | quote }} + - host: {{ tpl .host $ | quote }} port: {{ .port }} {{- end }} {{- with .Values.metadata.etcd.tls }} diff --git a/charts/operator-wandb/charts/bufstream/templates/deployment.yaml b/charts/operator-wandb/charts/bufstream/templates/deployment.yaml index a2f24e0b..21d7bd05 100644 --- a/charts/operator-wandb/charts/bufstream/templates/deployment.yaml +++ b/charts/operator-wandb/charts/bufstream/templates/deployment.yaml @@ -250,9 +250,9 @@ spec: {{- if and (eq .Values.storage.use "s3") (.Values.storage.s3.accessKeyId) }} - name: storage secret: - secretName: {{ .Values.storage.s3.secretName }} + secretName: {{ tpl .Values.storage.s3.secretName . }} items: - - key: secret_access_key + - key: {{ tpl .Values.storage.s3.secretKeyName . }} path: secret_access_key {{- end }} {{- if and (eq .Values.storage.use "gcs") (.Values.storage.gcs.secretName) }} diff --git a/charts/operator-wandb/charts/mysql/templates/statefulset.yaml b/charts/operator-wandb/charts/mysql/templates/statefulset.yaml index 438e0931..c2b2a1be 100644 --- a/charts/operator-wandb/charts/mysql/templates/statefulset.yaml +++ b/charts/operator-wandb/charts/mysql/templates/statefulset.yaml @@ -59,17 +59,17 @@ spec: {{- include "mysql.extraEnv" (dict "global" .Values.global "local" .Values) | nindent 12 }} {{- include "wandb.extraEnvFrom" (dict "root" $ "local" .) | nindent 12 }} livenessProbe: - exec: - command: - - sh - - -c - - "mysqladmin ping -u root -p${MYSQL_ROOT_PASSWORD}" + tcpSocket: + port: 3306 readinessProbe: - exec: - command: - - sh - - -c - - "mysqladmin ping -u root -p${MYSQL_ROOT_PASSWORD}" + tcpSocket: + port: 3306 + startupProbe: + initialDelaySeconds: 20 + periodSeconds: 5 + failureThreshold: 60 + tcpSocket: + port: 3306 volumeMounts: - name: data mountPath: /var/lib/mysql diff --git a/charts/operator-wandb/charts/mysql/values.yaml b/charts/operator-wandb/charts/mysql/values.yaml index b779d139..6e9e61d2 100644 --- a/charts/operator-wandb/charts/mysql/values.yaml +++ b/charts/operator-wandb/charts/mysql/values.yaml @@ -29,9 +29,9 @@ pod: annotations: {} securityContext: runAsNonRoot: true - runAsUser: 1000 + runAsUser: 999 runAsGroup: 0 - fsGroup: 1000 + fsGroup: 999 fsGroupChangePolicy: "OnRootMismatch" seccompProfile: type: "" diff --git a/charts/operator-wandb/charts/stackdriver/values.yaml b/charts/operator-wandb/charts/stackdriver/values.yaml index 51ed7b65..d7463a98 100644 --- a/charts/operator-wandb/charts/stackdriver/values.yaml +++ b/charts/operator-wandb/charts/stackdriver/values.yaml @@ -50,7 +50,7 @@ stackdriver: # Filter objects: project, group.id, resource.type, resource.labels.[KEY], metric.type, metric.labels.[KEY] # https://cloud.google.com/monitoring/api/v3/filters filters: - [] + - cloudsql.googleapis.com/database:metadata.user_labels."customer-ns"="dpanzella-test-gcp" # - 'pubsub.googleapis.com/subscription:resource.labels.subscription_id=monitoring.regex.full_match("us-west4.*my-team.*")' # The frequency to request interval: "5m" diff --git a/charts/operator-wandb/templates/_redis.tpl b/charts/operator-wandb/templates/_redis.tpl index 9c29f6e2..412b117f 100644 --- a/charts/operator-wandb/templates/_redis.tpl +++ b/charts/operator-wandb/templates/_redis.tpl @@ -5,7 +5,7 @@ Return name of secret where redis information is stored {{- if .Values.global.redis.secret.secretName -}} {{ .Values.global.redis.secret.secretName }} {{- else -}} - {{- print .Release.Name "-redis-secret" -}} + {{- print .Release.Name "-redis" -}} {{- end -}} {{- end -}} diff --git a/charts/operator-wandb/templates/redis.yaml b/charts/operator-wandb/templates/redis.yaml index 7330fa1e..f3f3adc5 100644 --- a/charts/operator-wandb/templates/redis.yaml +++ b/charts/operator-wandb/templates/redis.yaml @@ -1,16 +1,14 @@ -{{- if not .Values.redis.install }} -{{- $secretName := (include "wandb.redis.passwordSecret" .) }} +{{- if not .Values.global.redis.secret.secretName }} apiVersion: v1 kind: Secret metadata: - name: "{{ $secretName }}" + name: "{{ include "wandb.redis.passwordSecret" . }}" labels: {{- include "wandb.commonLabels" . | nindent 4 }} data: REDIS_PASSWORD: {{ include "wandb.redis.password" . | b64enc }} REDIS_CA_CERT: {{ include "wandb.redis.caCert" . | b64enc }} {{- end }} - --- apiVersion: v1 kind: ConfigMap diff --git a/charts/operator-wandb/templates/tests/test-connection.yaml b/charts/operator-wandb/templates/tests/test-connection.yaml new file mode 100644 index 00000000..a7010b65 --- /dev/null +++ b/charts/operator-wandb/templates/tests/test-connection.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "wandb.fullname" . }}-test-connection" + labels: + {{- include "wandb.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wandb-verify + image: python:3.10 + env: + - name: WANDB_BASE_URL + value: "http://{{ .Release.Name }}-app:8080" + - name: WANDB_API_KEY + value: "test-api-key" + # wandb verify is terribly flaky, there's a PR up to fix it until then we try 3 times + command: + - sh + - -c + - "pip install wandb==0.17.8 && ((sleep 30 && wandb verify) || (sleep 30 && wandb verify) || (sleep 30 && wandb verify))" + restartPolicy: Never diff --git a/charts/operator-wandb/values.yaml b/charts/operator-wandb/values.yaml index 310a1622..a546ab20 100644 --- a/charts/operator-wandb/values.yaml +++ b/charts/operator-wandb/values.yaml @@ -122,7 +122,7 @@ global: redis: host: "" port: 6379 - password: "" + password: "redis-password" parameters: {} caCert: "" secret: @@ -323,7 +323,9 @@ redis: nameOverride: "redis" architecture: standalone auth: - enabled: false + enabled: true + existingSecret: '{{ include "wandb.redis.passwordSecret" . }}' + existingSecretPasswordKey: '{{ .Values.global.redis.secret.secretKey }}' metrics: enabled: false service: @@ -526,7 +528,7 @@ glue: "{{ .Release.Name }}-mysql": "secretRef" "{{ .Release.Name }}-bucket-configmap": "configMapRef" "{{ .Release.Name }}-mysql-configmap": "configMapRef" - "{{ .Release.Name }}-redis-secret": "secretRef" + '{{ include "wandb.redis.passwordSecret" . }}': "secretRef" "{{ .Release.Name }}-redis-configmap": "configMapRef" "{{ .Release.Name }}-global-secret": "secretRef" "{{ .Release.Name }}-gorilla-secret": "secretRef" @@ -622,7 +624,7 @@ glue: optional: true - name: redis-ca secret: - secretName: "{{ .Release.Name }}-redis-secret" + secretName: '{{ include "wandb.redis.passwordSecret" . }}' items: - key: REDIS_CA_CERT path: redis_ca.pem @@ -717,7 +719,7 @@ api: "{{ .Release.Name }}-mysql": "secretRef" "{{ .Release.Name }}-bucket-configmap": "configMapRef" "{{ .Release.Name }}-mysql-configmap": "configMapRef" - "{{ .Release.Name }}-redis-secret": "secretRef" + '{{ include "wandb.redis.passwordSecret" . }}': "secretRef" "{{ .Release.Name }}-redis-configmap": "configMapRef" "{{ .Release.Name }}-kafka-configmap": "configMapRef" "{{ .Release.Name }}-global-secret": "secretRef" @@ -819,7 +821,7 @@ api: optional: true - name: redis-ca secret: - secretName: "{{ .Release.Name }}-redis-secret" + secretName: '{{ include "wandb.redis.passwordSecret" . }}' items: - key: REDIS_CA_CERT path: redis_ca.pem diff --git a/test-configs/minio/default.yaml b/test-configs/minio/default.yaml new file mode 100644 index 00000000..2c8908e2 --- /dev/null +++ b/test-configs/minio/default.yaml @@ -0,0 +1,51 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: minio # Change this value if you want a different namespace name + labels: + name: minio # Change this value to match metadata.name +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + app: minio + name: minio + namespace: minio +spec: + containers: + - name: minio + image: quay.io/minio/minio:latest + env: + - name: MINIO_ACCESS_KEY + value: "minio" + - name: MINIO_SECRET_KEY + value: "minio123" + command: + - /bin/bash + - -c + args: + - mkdir -p /data/bucket && minio server /data --console-address :9090 + ports: + - containerPort: 9000 + name: minio + volumeMounts: + - mountPath: /data + name: localvolume # + volumes: + - name: localvolume + hostPath: + path: /mnt/minio/data + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: Service +metadata: + name: minio + namespace: minio # Change this value to match the namespace metadata.name +spec: + selector: + app: minio + ports: + - port: 9000 + name: minio \ No newline at end of file diff --git a/test-configs/operator-wandb/default.yaml b/test-configs/operator-wandb/default.yaml new file mode 100644 index 00000000..832857b1 --- /dev/null +++ b/test-configs/operator-wandb/default.yaml @@ -0,0 +1,53 @@ +global: + bucket: + provider: "s3" + name: "minio.minio.svc.cluster.local:9000/bucket" + region: "us-east-1" + accessKey: "minio" + secretKey: "minio123" + +app: + extraEnv: + GLOBAL_ADMIN_API_KEY: "test-api-key" + GORILLA_INSECURE_ALLOW_API_KEY_ADMIN_ACCESS: "true" + resources: + requests: + cpu: "100m" + memory: "128Mi" + +parquet: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +weave: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +console: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +ingress: + install: false + create: false + +mysql: + install: true + resources: + requests: + cpu: "100m" + memory: "128Mi" +redis: + install: true + auth: + enabled: true + resources: + requests: + cpu: "100m" + memory: "128Mi" diff --git a/test-configs/operator-wandb/runs-v2-bufstream.yaml b/test-configs/operator-wandb/runs-v2-bufstream.yaml new file mode 100644 index 00000000..959e08cb --- /dev/null +++ b/test-configs/operator-wandb/runs-v2-bufstream.yaml @@ -0,0 +1,114 @@ +global: + bucket: + provider: "s3" + name: "minio.minio.svc.cluster.local:9000/bucket" + region: "us-east-1" + accessKey: "minio" + secretKey: "minio123" + + beta: + glue: + enabled: true + api: + enabled: true + bufstream: + enabled: true +app: + extraEnv: + GLOBAL_ADMIN_API_KEY: "test-api-key" + GORILLA_INSECURE_ALLOW_API_KEY_ADMIN_ACCESS: "true" + GORILLA_RUN_STORE_ONPREM_MIGRATE_CREATE_RUN_TABLES: 'true' + GORILLA_RUN_STORE_ONPREM_MIGRATE_CREATE_RUN_STORE: 'true' + GORILLA_RUN_STORE_ONPREM_MIGRATE_SHADOW_RUN_UPDATES: 'true' + GORILLA_RUN_STORE_ONPREM_MIGRATE_DISABLE_READS: 'false' + GORILLA_RUN_STORE_ONPREM_MIGRATE_FLAT_RUNS_MIGRATOR: 'true' + GORILLA_STATSD_HOST: datadog.datadog + GORILLA_STATSD_PORT: 8125 + resources: + requests: + cpu: "100m" + memory: "128Mi" + +api: + containers: + api: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +bufstream: + install: true + zone: "kind" + storage: + use: s3 + s3: + accessKeyId: "minio" + secretName: '{{ (include "wandb.bucket" . | fromYaml).secretName }}' + secretKeyName: '{{ (include "wandb.bucket" . | fromYaml).secretKeyName }}' + bucket: bucket + region: "us-east-1" + forcePathStyle: true + endpoint: http://minio.minio.svc.cluster.local:9000 + metadata: + use: etcd + etcd: + addresses: + - host: "{{ .Release.Name }}-etcd.{{ .Release.Namespace }}.svc.cluster.local" + port: 2379 + bufstream: + deployment: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +flat-run-fields-updater: + install: true + resources: + requests: + cpu: "100m" + memory: "128Mi" + +glue: + containers: + glue: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +parquet: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +weave: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +console: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +ingress: + install: false + create: false + +mysql: + install: true + resources: + requests: + cpu: "100m" + memory: "128Mi" +redis: + install: true + resources: + requests: + cpu: "100m" + memory: "128Mi" diff --git a/test-configs/operator-wandb/separate-pods.yaml b/test-configs/operator-wandb/separate-pods.yaml new file mode 100644 index 00000000..b50cc949 --- /dev/null +++ b/test-configs/operator-wandb/separate-pods.yaml @@ -0,0 +1,72 @@ +global: + bucket: + provider: "s3" + name: "minio.minio.svc.cluster.local:9000/bucket" + region: "us-east-1" + accessKey: "minio" + secretKey: "minio123" + + beta: + glue: + enabled: true + api: + enabled: true +app: + extraEnv: + GLOBAL_ADMIN_API_KEY: "test-api-key" + GORILLA_INSECURE_ALLOW_API_KEY_ADMIN_ACCESS: "true" + resources: + requests: + cpu: "100m" + memory: "128Mi" + +api: + containers: + api: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +glue: + containers: + glue: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +parquet: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +weave: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +console: + resources: + requests: + cpu: "100m" + memory: "128Mi" + +ingress: + install: false + create: false + +mysql: + install: true + resources: + requests: + cpu: "100m" + memory: "128Mi" +redis: + install: true + resources: + requests: + cpu: "100m" + memory: "128Mi"