Merge pull request #806 from jkottiku/master

Power stress & performance conf.
ROCm · Sep 10, 2024 · 9fa1e36 · 9fa1e36
1 parent 6e3d83b
commit 9fa1e36
Show file tree

Hide file tree

Showing 2 changed files with 333 additions and 0 deletions.
diff --git a/rvs/conf/MI300X-HF/gst_single.conf b/rvs/conf/MI300X-HF/gst_single.conf
@@ -0,0 +1,269 @@
+# ################################################################################
+# #
+# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
+# #
+# # MIT LICENSE:
+# # Permission is hereby granted, free of charge, to any person obtaining a copy of
+# # this software and associated documentation files (the "Software"), to deal in
+# # the Software without restriction, including without limitation the rights to
+# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+# # of the Software, and to permit persons to whom the Software is furnished to do
+# # so, subject to the following conditions:
+# #
+# # The above copyright notice and this permission notice shall be included in all
+# # copies or substantial portions of the Software.
+# #
+# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# # SOFTWARE.
+# #
+# ###############################################################################
+
+# GST test - gst-1215Tflops-4K4K8K-rand-fp8
+#
+# Preconditions:
+#   Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g
+#   option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify
+#   all the GPUs IDs separated by white space
+#   Set matrices sizes to 4864 * 4096 * 8192
+#   Set matrix data type as fp8 real number
+#   Set matrix data initialization method as random integer
+#   Set copy_matrix to false (the matrices will be copied to GPUs only once)
+#   Set target stress GFLOPS as 1215000 (1215 TFLOPS)
+#
+# Expected result:
+#   The test on each GPU passes (TRUE) if the GPU achieves 1215 TFLOPS or more
+#   within the test duration of 15 seconds after ramp-up duration of 5 seconds.
+#   Else test on the GPU fails (FALSE).
+
+actions:
+- name: gst-1215Tflops-4K4K8K-rand-fp8
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 1215000
+  matrix_size_a: 4864
+  matrix_size_b: 4096
+  matrix_size_c: 8192
+  matrix_init: rand
+  data_type: fp8_r
+  lda: 8320
+  ldb: 8320
+  ldc: 4992
+  ldd: 4992
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-981Tflops-4K4K8K-trig-fp8
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 981000
+  matrix_size_a: 4864
+  matrix_size_b: 4096
+  matrix_size_c: 8192
+  matrix_init: trig
+  data_type: fp8_r
+  lda: 8320
+  ldb: 8320
+  ldc: 4992
+  ldd: 4992
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-639Tflops-4K4K8K-rand-fp16
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 639000
+  matrix_size_a: 4864
+  matrix_size_b: 4096
+  matrix_size_c: 8192
+  matrix_init: rand
+  data_type: fp16_r
+  lda: 8320
+  ldb: 8320
+  ldc: 4992
+  ldd: 4992
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-523Tflops-4K4K8K-trig-fp16
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 523000
+  matrix_size_a: 4864
+  matrix_size_b: 4096
+  matrix_size_c: 8192
+  matrix_init: trig
+  data_type: fp16_r
+  lda: 8320
+  ldb: 8320
+  ldc: 4992
+  ldd: 4992
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-581Tflops-4K4K8K-rand-bf16
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 581000
+  matrix_size_a: 4864
+  matrix_size_b: 4096
+  matrix_size_c: 8192
+  matrix_init: rand
+  data_type: bf16_r
+  lda: 8320
+  ldb: 8320
+  ldc: 4992
+  ldd: 4992
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-552Tflops-4K4K8K-trig-bf16
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 552000
+  matrix_size_a: 4864
+  matrix_size_b: 4096
+  matrix_size_c: 8192
+  matrix_init: trig
+  data_type: bf16_r
+  lda: 8320
+  ldb: 8320
+  ldc: 4992
+  ldd: 4992
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-100Tflops-3K-trig-sgemm
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 100000
+  matrix_size_a: 3072
+  matrix_size_b: 3072
+  matrix_size_c: 3072
+  matrix_init: trig
+  ops_type: sgemm
+  lda: 3072
+  ldb: 3072
+  ldc: 3072
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-100Tflops-3K-rand-sgemm
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 100000
+  matrix_size_a: 3072
+  matrix_size_b: 3072
+  matrix_size_c: 3072
+  matrix_init: rand
+  ops_type: sgemm
+  lda: 3072
+  ldb: 3072
+  ldc: 3072
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-70Tflops-8K-trig-dgemm
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 70000
+  matrix_size_a: 8192
+  matrix_size_b: 8192
+  matrix_size_c: 8192
+  matrix_init: trig
+  ops_type: dgemm
+  lda: 8192
+  ldb: 8192
+  ldc: 8192
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
+- name: gst-70Tflops-8K-rand-dgemm
+  device: all
+  module: gst
+  log_interval: 3000
+  ramp_interval: 5000
+  duration: 15000
+  hot_calls: 1000
+  copy_matrix: false
+  target_stress: 70000
+  matrix_size_a: 8192
+  matrix_size_b: 8192
+  matrix_size_c: 8192
+  matrix_init: rand
+  ops_type: dgemm
+  lda: 8192
+  ldb: 8192
+  ldc: 8192
+  transa: 1
+  transb: 0
+  alpha: 1
+  beta: 0
+
diff --git a/rvs/conf/MI300X-HF/iet_stress.conf b/rvs/conf/MI300X-HF/iet_stress.conf
@@ -0,0 +1,64 @@
+# ################################################################################
+# #
+# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
+# #
+# # MIT LICENSE:
+# # Permission is hereby granted, free of charge, to any person obtaining a copy of
+# # this software and associated documentation files (the "Software"), to deal in
+# # the Software without restriction, including without limitation the rights to
+# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+# # of the Software, and to permit persons to whom the Software is furnished to do
+# # so, subject to the following conditions:
+# #
+# # The above copyright notice and this permission notice shall be included in all
+# # copies or substantial portions of the Software.
+# #
+# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# # SOFTWARE.
+# #
+# ###############################################################################
+
+# IET stress test
+#
+# Preconditions:
+#   Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g
+#   option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify
+#   all the GPUs IDs separated by comma.
+#   Set parallel execution to true (gemm workload execution on all GPUs in parallel)
+#   Set gemm operation type as dgemm.
+#   Set matrix_size to 28000.
+#   Test duration set to 10 mins.
+#   Target power set to 850W for each GPU.
+#
+# Run test with:
+#   cd bin
+#   ./rvs -c conf/MI300X-HF/iet_stress.conf
+#
+# Expected result:
+#   The test on each GPU passes (TRUE) if the GPU achieves power target of 850W.
+#
+
+actions:
+- name: iet-stress-850W-dgemm-true
+  device: all
+  module: iet
+  parallel: true
+  duration: 600000
+  ramp_interval: 10000
+  sample_interval: 5000
+  log_interval: 5000
+  target_power: 850
+  matrix_size: 28000
+  ops_type: dgemm
+  lda: 28000
+  ldb: 28000
+  ldc: 28000
+  alpha: 1
+  beta: 1
+  matrix_init: hiprand
+