From e99f1ce754240bd775c454aabc2d2e7ea9a39164 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 14 Jan 2025 18:37:09 +0000 Subject: [PATCH 1/6] KEP-2170: Deploy JobSet in kubeflow-system namespace Signed-off-by: Andrey Velichkevich --- manifests/v2/base/manager/kustomization.yaml | 2 - .../kustomization.yaml | 0 .../torch_distributed.yaml} | 0 .../manager/jobset_manager_config.yaml | 3 ++ .../v2/overlays/manager/kustomization.yaml | 39 +++++++++++++++++++ .../{standalone => manager}/namespace.yaml | 2 + .../manager/patches/jobset_config_patch.yaml | 21 ++++++++++ .../patches/jobset_remove_namespace.yaml} | 4 +- .../overlays/only-manager/kustomization.yaml | 18 --------- .../kustomization.yaml | 2 +- .../v2/overlays/standalone/kustomization.yaml | 19 --------- 11 files changed, 69 insertions(+), 41 deletions(-) rename manifests/v2/base/runtimes/{pre-training => pretraining}/kustomization.yaml (100%) rename manifests/v2/base/runtimes/{pre-training/torch-distributed.yaml => pretraining/torch_distributed.yaml} (100%) create mode 100644 manifests/v2/overlays/manager/jobset_manager_config.yaml create mode 100644 manifests/v2/overlays/manager/kustomization.yaml rename manifests/v2/overlays/{standalone => manager}/namespace.yaml (57%) create mode 100644 manifests/v2/overlays/manager/patches/jobset_config_patch.yaml rename manifests/v2/overlays/{only-manager/namespace.yaml => manager/patches/jobset_remove_namespace.yaml} (50%) delete mode 100644 manifests/v2/overlays/only-manager/kustomization.yaml rename manifests/v2/overlays/{only-runtimes => runtimes}/kustomization.yaml (66%) delete mode 100644 manifests/v2/overlays/standalone/kustomization.yaml diff --git a/manifests/v2/base/manager/kustomization.yaml b/manifests/v2/base/manager/kustomization.yaml index a62e9473d9..7394a6d059 100644 --- a/manifests/v2/base/manager/kustomization.yaml +++ b/manifests/v2/base/manager/kustomization.yaml @@ -1,4 +1,2 @@ resources: - manager.yaml -# TODO (andreyvelich): Move it to overlays once we copy the JobSet manifests. -namespace: kubeflow-system diff --git a/manifests/v2/base/runtimes/pre-training/kustomization.yaml b/manifests/v2/base/runtimes/pretraining/kustomization.yaml similarity index 100% rename from manifests/v2/base/runtimes/pre-training/kustomization.yaml rename to manifests/v2/base/runtimes/pretraining/kustomization.yaml diff --git a/manifests/v2/base/runtimes/pre-training/torch-distributed.yaml b/manifests/v2/base/runtimes/pretraining/torch_distributed.yaml similarity index 100% rename from manifests/v2/base/runtimes/pre-training/torch-distributed.yaml rename to manifests/v2/base/runtimes/pretraining/torch_distributed.yaml diff --git a/manifests/v2/overlays/manager/jobset_manager_config.yaml b/manifests/v2/overlays/manager/jobset_manager_config.yaml new file mode 100644 index 0000000000..0842f50ff6 --- /dev/null +++ b/manifests/v2/overlays/manager/jobset_manager_config.yaml @@ -0,0 +1,3 @@ +apiVersion: config.jobset.x-k8s.io/v1alpha1 +kind: Configuration +namespace: kubeflow-system diff --git a/manifests/v2/overlays/manager/kustomization.yaml b/manifests/v2/overlays/manager/kustomization.yaml new file mode 100644 index 0000000000..bf37ae448e --- /dev/null +++ b/manifests/v2/overlays/manager/kustomization.yaml @@ -0,0 +1,39 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +# Namespace where all resources are deployed. +namespace: kubeflow-system + +resources: + - namespace.yaml + - ../../base/crds + - ../../base/manager + - ../../base/rbac + - ../../base/webhook + - https://github.com/kubernetes-sigs/jobset/releases/download/v0.7.2/manifests.yaml + +# Update the Kubeflow Training manager image tag. +images: + - name: kubeflow/training-operator-v2 + newTag: latest + +# Disable postfix for Secret and ConfigMap +generatorOptions: + disableNameSuffixHash: true + +# Secret for the Kubeflow Training webhook. +secretGenerator: + - name: training-operator-v2-webhook-cert + namespace: kubeflow-system + +# Config for the JobSet manager. +# TODO (andreyvelich): Remove this when this is released: https://github.com/kubernetes-sigs/jobset/issues/720 +configMapGenerator: + - files: + - jobset_manager_config.yaml + name: jobset-manager-config + +# Add required patches. +patchesStrategicMerge: + - patches/jobset_remove_namespace.yaml # Remove namespace from the JobSet release manifests. + - patches/jobset_config_patch.yaml # Add custom manager config to the JobSet. diff --git a/manifests/v2/overlays/standalone/namespace.yaml b/manifests/v2/overlays/manager/namespace.yaml similarity index 57% rename from manifests/v2/overlays/standalone/namespace.yaml rename to manifests/v2/overlays/manager/namespace.yaml index 6bfc4968bd..ad69cc64ed 100644 --- a/manifests/v2/overlays/standalone/namespace.yaml +++ b/manifests/v2/overlays/manager/namespace.yaml @@ -2,3 +2,5 @@ apiVersion: v1 kind: Namespace metadata: name: kubeflow-system + labels: + app.kubernetes.io/instance: system diff --git a/manifests/v2/overlays/manager/patches/jobset_config_patch.yaml b/manifests/v2/overlays/manager/patches/jobset_config_patch.yaml new file mode 100644 index 0000000000..c8e5fdfef6 --- /dev/null +++ b/manifests/v2/overlays/manager/patches/jobset_config_patch.yaml @@ -0,0 +1,21 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jobset-controller-manager + namespace: jobset-system +spec: + template: + spec: + containers: + - name: manager + args: + - "--config=/jobset_manager_config.yaml" + volumeMounts: + - name: jobset-manager-config + mountPath: /jobset_manager_config.yaml + subPath: jobset_manager_config.yaml + readOnly: true + volumes: + - name: jobset-manager-config + configMap: + name: jobset-manager-config diff --git a/manifests/v2/overlays/only-manager/namespace.yaml b/manifests/v2/overlays/manager/patches/jobset_remove_namespace.yaml similarity index 50% rename from manifests/v2/overlays/only-manager/namespace.yaml rename to manifests/v2/overlays/manager/patches/jobset_remove_namespace.yaml index 6bfc4968bd..1e7a99903e 100644 --- a/manifests/v2/overlays/only-manager/namespace.yaml +++ b/manifests/v2/overlays/manager/patches/jobset_remove_namespace.yaml @@ -1,4 +1,6 @@ +--- +$patch: delete apiVersion: v1 kind: Namespace metadata: - name: kubeflow-system + name: jobset-system diff --git a/manifests/v2/overlays/only-manager/kustomization.yaml b/manifests/v2/overlays/only-manager/kustomization.yaml deleted file mode 100644 index b6f81239d8..0000000000 --- a/manifests/v2/overlays/only-manager/kustomization.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - namespace.yaml - - ../../base/crds - - ../../base/manager - - ../../base/rbac - - ../../base/webhook - # TODO (andreyvelich): JobSet should support kubeflow-system namespace. - - https://github.com/kubernetes-sigs/jobset/releases/download/v0.6.0/manifests.yaml -images: - - name: kubeflow/training-operator-v2 - newTag: latest -secretGenerator: - - name: training-operator-v2-webhook-cert - namespace: kubeflow-system - options: - disableNameSuffixHash: true diff --git a/manifests/v2/overlays/only-runtimes/kustomization.yaml b/manifests/v2/overlays/runtimes/kustomization.yaml similarity index 66% rename from manifests/v2/overlays/only-runtimes/kustomization.yaml rename to manifests/v2/overlays/runtimes/kustomization.yaml index 41fb29b783..970726d8c8 100644 --- a/manifests/v2/overlays/only-runtimes/kustomization.yaml +++ b/manifests/v2/overlays/runtimes/kustomization.yaml @@ -1,4 +1,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - ../../base/runtimes/pre-training + - ../../base/runtimes/pretraining diff --git a/manifests/v2/overlays/standalone/kustomization.yaml b/manifests/v2/overlays/standalone/kustomization.yaml deleted file mode 100644 index 2a59e17ed4..0000000000 --- a/manifests/v2/overlays/standalone/kustomization.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - namespace.yaml - - ../../base/crds - - ../../base/manager - - ../../base/rbac - - ../../base/webhook - - ../../base/runtimes/pre-training - # TODO (andreyvelich): JobSet should support kubeflow-system namespace. - - https://github.com/kubernetes-sigs/jobset/releases/download/v0.6.0/manifests.yaml -images: - - name: kubeflow/training-operator-v2 - newTag: latest -secretGenerator: - - name: training-operator-v2-webhook-cert - namespace: kubeflow-system - options: - disableNameSuffixHash: true From de9c1e203bc166106f2a227b9b06395ddfc89ed6 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 14 Jan 2025 18:40:56 +0000 Subject: [PATCH 2/6] Remove namespace from base Signed-off-by: Andrey Velichkevich --- manifests/v2/base/rbac/kustomization.yaml | 2 -- manifests/v2/base/runtimes/pretraining/kustomization.yaml | 2 +- manifests/v2/base/webhook/kustomization.yaml | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/manifests/v2/base/rbac/kustomization.yaml b/manifests/v2/base/rbac/kustomization.yaml index e9fca6afba..25a37bf74f 100644 --- a/manifests/v2/base/rbac/kustomization.yaml +++ b/manifests/v2/base/rbac/kustomization.yaml @@ -2,5 +2,3 @@ resources: - role.yaml - role_binding.yaml - service_account.yaml -# TODO (andreyvelich): Move it to overlays once we copy the JobSet manifests. -namespace: kubeflow-system diff --git a/manifests/v2/base/runtimes/pretraining/kustomization.yaml b/manifests/v2/base/runtimes/pretraining/kustomization.yaml index 1fb6985131..6facf87216 100644 --- a/manifests/v2/base/runtimes/pretraining/kustomization.yaml +++ b/manifests/v2/base/runtimes/pretraining/kustomization.yaml @@ -1,4 +1,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - torch-distributed.yaml + - torch_distributed.yaml diff --git a/manifests/v2/base/webhook/kustomization.yaml b/manifests/v2/base/webhook/kustomization.yaml index 1ea670ceef..5723808d02 100644 --- a/manifests/v2/base/webhook/kustomization.yaml +++ b/manifests/v2/base/webhook/kustomization.yaml @@ -10,5 +10,3 @@ patches: kind: ValidatingWebhookConfiguration configurations: - kustomizeconfig.yaml -# TODO (andreyvelich): Move it to overlays once we copy the JobSet manifests. -namespace: kubeflow-system From 2d0fa6a9744e1c6a9c766889d2f95b28c9691ad7 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Tue, 14 Jan 2025 18:46:48 +0000 Subject: [PATCH 3/6] Remove label from namespace Signed-off-by: Andrey Velichkevich --- manifests/v2/overlays/manager/namespace.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/manifests/v2/overlays/manager/namespace.yaml b/manifests/v2/overlays/manager/namespace.yaml index ad69cc64ed..6bfc4968bd 100644 --- a/manifests/v2/overlays/manager/namespace.yaml +++ b/manifests/v2/overlays/manager/namespace.yaml @@ -2,5 +2,3 @@ apiVersion: v1 kind: Namespace metadata: name: kubeflow-system - labels: - app.kubernetes.io/instance: system From 6021349dc3740d979e7acfe54647f57d1f3b35d2 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Wed, 15 Jan 2025 23:52:47 +0000 Subject: [PATCH 4/6] Create third-party dir for JobSet Signed-off-by: Andrey Velichkevich --- .../v2/overlays/manager/kustomization.yaml | 20 +++---------------- .../jobset}/jobset_manager_config.yaml | 0 .../v2/third-party/jobset/kustomization.yaml | 19 ++++++++++++++++++ .../jobset}/patches/jobset_config_patch.yaml | 0 .../patches/jobset_remove_namespace.yaml | 0 5 files changed, 22 insertions(+), 17 deletions(-) rename manifests/v2/{overlays/manager => third-party/jobset}/jobset_manager_config.yaml (100%) create mode 100644 manifests/v2/third-party/jobset/kustomization.yaml rename manifests/v2/{overlays/manager => third-party/jobset}/patches/jobset_config_patch.yaml (100%) rename manifests/v2/{overlays/manager => third-party/jobset}/patches/jobset_remove_namespace.yaml (100%) diff --git a/manifests/v2/overlays/manager/kustomization.yaml b/manifests/v2/overlays/manager/kustomization.yaml index bf37ae448e..8e3b9873df 100644 --- a/manifests/v2/overlays/manager/kustomization.yaml +++ b/manifests/v2/overlays/manager/kustomization.yaml @@ -10,30 +10,16 @@ resources: - ../../base/manager - ../../base/rbac - ../../base/webhook - - https://github.com/kubernetes-sigs/jobset/releases/download/v0.7.2/manifests.yaml + - ../../third-party/jobset # Comment this line if JobSet is installed on the Kubernetes cluster. # Update the Kubeflow Training manager image tag. images: - name: kubeflow/training-operator-v2 newTag: latest -# Disable postfix for Secret and ConfigMap -generatorOptions: - disableNameSuffixHash: true - # Secret for the Kubeflow Training webhook. secretGenerator: - name: training-operator-v2-webhook-cert namespace: kubeflow-system - -# Config for the JobSet manager. -# TODO (andreyvelich): Remove this when this is released: https://github.com/kubernetes-sigs/jobset/issues/720 -configMapGenerator: - - files: - - jobset_manager_config.yaml - name: jobset-manager-config - -# Add required patches. -patchesStrategicMerge: - - patches/jobset_remove_namespace.yaml # Remove namespace from the JobSet release manifests. - - patches/jobset_config_patch.yaml # Add custom manager config to the JobSet. + options: + disableNameSuffixHash: true diff --git a/manifests/v2/overlays/manager/jobset_manager_config.yaml b/manifests/v2/third-party/jobset/jobset_manager_config.yaml similarity index 100% rename from manifests/v2/overlays/manager/jobset_manager_config.yaml rename to manifests/v2/third-party/jobset/jobset_manager_config.yaml diff --git a/manifests/v2/third-party/jobset/kustomization.yaml b/manifests/v2/third-party/jobset/kustomization.yaml new file mode 100644 index 0000000000..6722d86e97 --- /dev/null +++ b/manifests/v2/third-party/jobset/kustomization.yaml @@ -0,0 +1,19 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - https://github.com/kubernetes-sigs/jobset/releases/download/v0.7.2/manifests.yaml + +# Config for the JobSet manager. +# TODO (andreyvelich): Remove this when this is released: https://github.com/kubernetes-sigs/jobset/issues/720 +configMapGenerator: + - name: jobset-manager-config + files: + - jobset_manager_config.yaml + options: + disableNameSuffixHash: true + +# Add required patches. +patchesStrategicMerge: + - patches/jobset_remove_namespace.yaml # Remove namespace from the JobSet release manifests. + - patches/jobset_config_patch.yaml # Add custom manager config to the JobSet. diff --git a/manifests/v2/overlays/manager/patches/jobset_config_patch.yaml b/manifests/v2/third-party/jobset/patches/jobset_config_patch.yaml similarity index 100% rename from manifests/v2/overlays/manager/patches/jobset_config_patch.yaml rename to manifests/v2/third-party/jobset/patches/jobset_config_patch.yaml diff --git a/manifests/v2/overlays/manager/patches/jobset_remove_namespace.yaml b/manifests/v2/third-party/jobset/patches/jobset_remove_namespace.yaml similarity index 100% rename from manifests/v2/overlays/manager/patches/jobset_remove_namespace.yaml rename to manifests/v2/third-party/jobset/patches/jobset_remove_namespace.yaml From ce42837300077aaae6b95781cef55050fb5414f8 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Sun, 26 Jan 2025 18:30:33 +0000 Subject: [PATCH 5/6] Bump JobSet to v0.7.3 Signed-off-by: Andrey Velichkevich --- manifests/v2/third-party/jobset/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/v2/third-party/jobset/kustomization.yaml b/manifests/v2/third-party/jobset/kustomization.yaml index 6722d86e97..415624afde 100644 --- a/manifests/v2/third-party/jobset/kustomization.yaml +++ b/manifests/v2/third-party/jobset/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - https://github.com/kubernetes-sigs/jobset/releases/download/v0.7.2/manifests.yaml + - https://github.com/kubernetes-sigs/jobset/releases/download/v0.7.3/manifests.yaml # Config for the JobSet manager. # TODO (andreyvelich): Remove this when this is released: https://github.com/kubernetes-sigs/jobset/issues/720 From c78d9809c9a341a59b91250948f1179cfb9e8511 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Sun, 26 Jan 2025 18:36:33 +0000 Subject: [PATCH 6/6] Drop namespace from JobSet config Signed-off-by: Andrey Velichkevich --- manifests/v2/third-party/jobset/jobset_manager_config.yaml | 1 - manifests/v2/third-party/jobset/kustomization.yaml | 1 - 2 files changed, 2 deletions(-) diff --git a/manifests/v2/third-party/jobset/jobset_manager_config.yaml b/manifests/v2/third-party/jobset/jobset_manager_config.yaml index 0842f50ff6..ac9957d47d 100644 --- a/manifests/v2/third-party/jobset/jobset_manager_config.yaml +++ b/manifests/v2/third-party/jobset/jobset_manager_config.yaml @@ -1,3 +1,2 @@ apiVersion: config.jobset.x-k8s.io/v1alpha1 kind: Configuration -namespace: kubeflow-system diff --git a/manifests/v2/third-party/jobset/kustomization.yaml b/manifests/v2/third-party/jobset/kustomization.yaml index 415624afde..139fa7300d 100644 --- a/manifests/v2/third-party/jobset/kustomization.yaml +++ b/manifests/v2/third-party/jobset/kustomization.yaml @@ -5,7 +5,6 @@ resources: - https://github.com/kubernetes-sigs/jobset/releases/download/v0.7.3/manifests.yaml # Config for the JobSet manager. -# TODO (andreyvelich): Remove this when this is released: https://github.com/kubernetes-sigs/jobset/issues/720 configMapGenerator: - name: jobset-manager-config files: