Skip to content

Commit

Permalink
Merge pull request #806 from jkottiku/master
Browse files Browse the repository at this point in the history
Power stress & performance conf.
  • Loading branch information
jkottiku committed Sep 10, 2024
1 parent 6e3d83b commit 9fa1e36
Show file tree
Hide file tree
Showing 2 changed files with 333 additions and 0 deletions.
269 changes: 269 additions & 0 deletions rvs/conf/MI300X-HF/gst_single.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
# ################################################################################
# #
# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
# #
# # MIT LICENSE:
# # Permission is hereby granted, free of charge, to any person obtaining a copy of
# # this software and associated documentation files (the "Software"), to deal in
# # the Software without restriction, including without limitation the rights to
# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
# # of the Software, and to permit persons to whom the Software is furnished to do
# # so, subject to the following conditions:
# #
# # The above copyright notice and this permission notice shall be included in all
# # copies or substantial portions of the Software.
# #
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# # SOFTWARE.
# #
# ###############################################################################

# GST test - gst-1215Tflops-4K4K8K-rand-fp8
#
# Preconditions:
# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g
# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify
# all the GPUs IDs separated by white space
# Set matrices sizes to 4864 * 4096 * 8192
# Set matrix data type as fp8 real number
# Set matrix data initialization method as random integer
# Set copy_matrix to false (the matrices will be copied to GPUs only once)
# Set target stress GFLOPS as 1215000 (1215 TFLOPS)
#
# Expected result:
# The test on each GPU passes (TRUE) if the GPU achieves 1215 TFLOPS or more
# within the test duration of 15 seconds after ramp-up duration of 5 seconds.
# Else test on the GPU fails (FALSE).

actions:
- name: gst-1215Tflops-4K4K8K-rand-fp8
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 1215000
matrix_size_a: 4864
matrix_size_b: 4096
matrix_size_c: 8192
matrix_init: rand
data_type: fp8_r
lda: 8320
ldb: 8320
ldc: 4992
ldd: 4992
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-981Tflops-4K4K8K-trig-fp8
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 981000
matrix_size_a: 4864
matrix_size_b: 4096
matrix_size_c: 8192
matrix_init: trig
data_type: fp8_r
lda: 8320
ldb: 8320
ldc: 4992
ldd: 4992
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-639Tflops-4K4K8K-rand-fp16
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 639000
matrix_size_a: 4864
matrix_size_b: 4096
matrix_size_c: 8192
matrix_init: rand
data_type: fp16_r
lda: 8320
ldb: 8320
ldc: 4992
ldd: 4992
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-523Tflops-4K4K8K-trig-fp16
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 523000
matrix_size_a: 4864
matrix_size_b: 4096
matrix_size_c: 8192
matrix_init: trig
data_type: fp16_r
lda: 8320
ldb: 8320
ldc: 4992
ldd: 4992
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-581Tflops-4K4K8K-rand-bf16
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 581000
matrix_size_a: 4864
matrix_size_b: 4096
matrix_size_c: 8192
matrix_init: rand
data_type: bf16_r
lda: 8320
ldb: 8320
ldc: 4992
ldd: 4992
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-552Tflops-4K4K8K-trig-bf16
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 552000
matrix_size_a: 4864
matrix_size_b: 4096
matrix_size_c: 8192
matrix_init: trig
data_type: bf16_r
lda: 8320
ldb: 8320
ldc: 4992
ldd: 4992
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-100Tflops-3K-trig-sgemm
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 100000
matrix_size_a: 3072
matrix_size_b: 3072
matrix_size_c: 3072
matrix_init: trig
ops_type: sgemm
lda: 3072
ldb: 3072
ldc: 3072
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-100Tflops-3K-rand-sgemm
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 100000
matrix_size_a: 3072
matrix_size_b: 3072
matrix_size_c: 3072
matrix_init: rand
ops_type: sgemm
lda: 3072
ldb: 3072
ldc: 3072
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-70Tflops-8K-trig-dgemm
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 70000
matrix_size_a: 8192
matrix_size_b: 8192
matrix_size_c: 8192
matrix_init: trig
ops_type: dgemm
lda: 8192
ldb: 8192
ldc: 8192
transa: 1
transb: 0
alpha: 1
beta: 0

- name: gst-70Tflops-8K-rand-dgemm
device: all
module: gst
log_interval: 3000
ramp_interval: 5000
duration: 15000
hot_calls: 1000
copy_matrix: false
target_stress: 70000
matrix_size_a: 8192
matrix_size_b: 8192
matrix_size_c: 8192
matrix_init: rand
ops_type: dgemm
lda: 8192
ldb: 8192
ldc: 8192
transa: 1
transb: 0
alpha: 1
beta: 0

64 changes: 64 additions & 0 deletions rvs/conf/MI300X-HF/iet_stress.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# ################################################################################
# #
# # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
# #
# # MIT LICENSE:
# # Permission is hereby granted, free of charge, to any person obtaining a copy of
# # this software and associated documentation files (the "Software"), to deal in
# # the Software without restriction, including without limitation the rights to
# # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
# # of the Software, and to permit persons to whom the Software is furnished to do
# # so, subject to the following conditions:
# #
# # The above copyright notice and this permission notice shall be included in all
# # copies or substantial portions of the Software.
# #
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# # SOFTWARE.
# #
# ###############################################################################

# IET stress test
#
# Preconditions:
# Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g
# option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify
# all the GPUs IDs separated by comma.
# Set parallel execution to true (gemm workload execution on all GPUs in parallel)
# Set gemm operation type as dgemm.
# Set matrix_size to 28000.
# Test duration set to 10 mins.
# Target power set to 850W for each GPU.
#
# Run test with:
# cd bin
# ./rvs -c conf/MI300X-HF/iet_stress.conf
#
# Expected result:
# The test on each GPU passes (TRUE) if the GPU achieves power target of 850W.
#

actions:
- name: iet-stress-850W-dgemm-true
device: all
module: iet
parallel: true
duration: 600000
ramp_interval: 10000
sample_interval: 5000
log_interval: 5000
target_power: 850
matrix_size: 28000
ops_type: dgemm
lda: 28000
ldb: 28000
ldc: 28000
alpha: 1
beta: 1
matrix_init: hiprand

0 comments on commit 9fa1e36

Please sign in to comment.