add benchmark for network topology with requiredDuringSchedulingIgnor…

…edDuringExecution (#112) Signed-off-by: Dmitry Shmulevich <[email protected]>
NVIDIA · Dec 11, 2024 · 3609dfe · 3609dfe
1 parent 39ed040
commit 3609dfe
Show file tree

Hide file tree

Showing 7 changed files with 328 additions and 40 deletions.
diff --git a/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml b/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml
@@ -0,0 +1,67 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: jobset.x-k8s.io/v1alpha2
+kind: JobSet
+metadata:
+  name: "{{._NAME_}}"
+  namespace: default
+spec:
+  # We want to declare our JobSet successful if workers finish.
+  # If workers finish we should clean up the remaining replicatedJobs.
+  successPolicy:
+    operator: All
+    targetReplicatedJobs:
+    - workers
+  replicatedJobs:
+  - name: workers
+    replicas: 1
+    template:
+      spec:
+        backoffLimit: 0
+        completions: {{.replicas}}
+        parallelism: {{.replicas}}
+        completionMode: NonIndexed
+        template:
+          metadata:
+            labels:
+              app: {{._NAME_}}
+            annotations:
+              pod-complete.stage.kwok.x-k8s.io/delay: "{{.ttl}}"
+              pod-complete.stage.kwok.x-k8s.io/jitter-delay: "{{.ttl}}"
+          spec:
+            schedulerName: default-scheduler
+            affinity:
+              podAffinity:
+                requiredDuringSchedulingIgnoredDuringExecution:
+                - labelSelector:
+                    matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                      - {{._NAME_}}
+                  topologyKey: network.topology.kubernetes.io/accelerator
+            containers:
+            - name: test
+              image: ubuntu
+              imagePullPolicy: IfNotPresent
+              resources:
+                limits:
+                  cpu: 100m
+                  memory: 250M
+                  nvidia.com/gpu: "8"
+                requests:
+                  cpu: 100m
+                  memory: 250M
+                  nvidia.com/gpu: "8"
diff --git a/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml b/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml
@@ -53,7 +53,7 @@ spec:
                             operator: In
                             values:
                               - {{._NAME_}}
-                      topologyKey: net-layer-2
+                      topologyKey: network.topology.kubernetes.io/spine
                   - weight: 90
                     podAffinityTerm:
                       labelSelector:
@@ -62,7 +62,7 @@ spec:
                             operator: In
                             values:
                               - {{._NAME_}}
-                      topologyKey: net-layer-1
+                      topologyKey: network.topology.kubernetes.io/block
             containers:
             - name: test
               image: ubuntu

diff --git a/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml b/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml
@@ -51,7 +51,7 @@ spec:
                           operator: In
                           values:
                             - {{._NAME_}}
-                    topologyKey: net-layer-2
+                    topologyKey: network.topology.kubernetes.io/spine
                 - weight: 90
                   podAffinityTerm:
                     labelSelector:
@@ -60,7 +60,7 @@ spec:
                           operator: In
                           values:
                             - {{._NAME_}}
-                    topologyKey: net-layer-1
+                    topologyKey: network.topology.kubernetes.io/block
           schedulerName: runai-scheduler
           containers:
           - image: runai/mpi-worker:latest

diff --git a/resources/benchmarks/nwtopo/workflows/config-jobset-acc.yaml b/resources/benchmarks/nwtopo/workflows/config-jobset-acc.yaml
@@ -0,0 +1,23 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: config-jobset
+tasks:
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml"
+    nameFormat: "jobset{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-workers-[0-9]+-.+"
+    podCount: "{{.replicas}}"
diff --git a/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml b/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml
@@ -0,0 +1,145 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: config-nw-topo-nodes
+description: |
+  Create a 12-nodes cluster with a tree-like network topology
+  and mark 5 nodes as busy:
+            __________ sw31 __________
+           /            |             \
+       sw21            sw22            sw23
+       /  \            /  \            /  \
+   sw11    sw12    sw13    sw14    sw15    sw16
+    /\      /\      /\      /\      /\      /\
+  n1  n2  n3  n4  n5  n6  n7  n8  n9 n10 n11 n12
+  |___|___|___|___|___|   |___|___|___|___|___|
+            |                       |
+           nvl1                    nvl2
+
+  Then deploy a 3-replicas job. The optimal nodes from the
+  network topology perspective for this job are nodes n5, n7, n8.
+tasks:
+- id: configure
+  type: Configure
+  params:
+    nodes:
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n1
+        network.topology.kubernetes.io/accelerator: nvl1
+        network.topology.kubernetes.io/block: sw11
+        network.topology.kubernetes.io/spine: sw21
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n2
+        network.topology.kubernetes.io/accelerator: nvl1
+        network.topology.kubernetes.io/block: sw11
+        network.topology.kubernetes.io/spine: sw21
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n3
+        network.topology.kubernetes.io/accelerator: nvl1
+        network.topology.kubernetes.io/block: sw12
+        network.topology.kubernetes.io/spine: sw21
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n4
+        network.topology.kubernetes.io/accelerator: nvl1
+        network.topology.kubernetes.io/block: sw12
+        network.topology.kubernetes.io/spine: sw21
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n5
+        network.topology.kubernetes.io/accelerator: nvl1
+        network.topology.kubernetes.io/block: sw13
+        network.topology.kubernetes.io/spine: sw22
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n6
+        network.topology.kubernetes.io/accelerator: nvl1
+        network.topology.kubernetes.io/block: sw13
+        network.topology.kubernetes.io/spine: sw22
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n7
+        network.topology.kubernetes.io/accelerator: nvl2
+        network.topology.kubernetes.io/block: sw14
+        network.topology.kubernetes.io/spine: sw22
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n8
+        network.topology.kubernetes.io/accelerator: nvl2
+        network.topology.kubernetes.io/block: sw14
+        network.topology.kubernetes.io/spine: sw22
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n9
+        network.topology.kubernetes.io/accelerator: nvl2
+        network.topology.kubernetes.io/block: sw15
+        network.topology.kubernetes.io/spine: sw23
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n10
+        network.topology.kubernetes.io/accelerator: nvl2
+        network.topology.kubernetes.io/block: sw15
+        network.topology.kubernetes.io/spine: sw23
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n11
+        network.topology.kubernetes.io/accelerator: nvl2
+        network.topology.kubernetes.io/block: sw16
+        network.topology.kubernetes.io/spine: sw23
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    - type: dgxa100.80g
+      count: 1
+      labels:
+        node-id: n12
+        network.topology.kubernetes.io/accelerator: nvl2
+        network.topology.kubernetes.io/block: sw16
+        network.topology.kubernetes.io/spine: sw23
+        network.topology.kubernetes.io/datacenter: sw31
+        nvidia.com/gpu.count: "8"
+    timeout: 5m