From 2a4cd36ad65ea271da523d3ee80652883c83484a Mon Sep 17 00:00:00 2001 From: seungrokjung Date: Sun, 6 Apr 2025 15:05:31 +0000 Subject: [PATCH] vllm rocm 6.3.1 deepseek r1, v3 Signed-off-by: seungrokjung --- benchmark/vllm/README.md | 10 +++----- models.json | 36 +++++++++++++++++++++++++++ scripts/vllm/config.csv | 10 ++++++++ scripts/vllm/vllm_benchmark_report.sh | 6 ++++- 4 files changed, 55 insertions(+), 7 deletions(-) diff --git a/benchmark/vllm/README.md b/benchmark/vllm/README.md index 4bb91b8..864334e 100644 --- a/benchmark/vllm/README.md +++ b/benchmark/vllm/README.md @@ -14,7 +14,7 @@ This Docker image packages vLLM with PyTorch for an AMD Instinct™ MI300X accelerator. It includes: - ✅ ROCm™ 6.3.1 -- ✅ vLLM 0.7.3 +- ✅ vLLM 0.7.4 - ✅ PyTorch 2.7.0 (dev nightly) - ✅ hipBLASLt 0.13 @@ -184,6 +184,7 @@ cd MAD/scripts/vllm | | amd/Mistral-7B-v0.1-FP8-KV | [Mistral 7B](https://huggingface.co/amd/Mistral-7B-v0.1-FP8-KV) | | | amd/dbrx-instruct-FP8-KV | [DBRX Instruct](https://huggingface.co/amd/dbrx-instruct-FP8-KV) | | | amd/c4ai-command-r-plus-FP8-KV | [C4AI Command R+ 08-2024](https://huggingface.co/amd/c4ai-command-r-plus-FP8-KV) | +| | deepseek-ai/DeepSeek-R1 | [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) | | $num_gpu | 1 or 8 | Number of GPUs | | $datatype | float16, float8 | Data type | @@ -303,13 +304,10 @@ owners and are only mentioned for informative purposes.    ## Changelog ---------- This release note summarizes notable changes since the previous docker release. - -- The vLLM version number was incremented from 0.6.6 to 0.7.3. - -- Improved fp8 throughput performance with HipblasLT 0.13 +- The vLLM version number was incremented from 0.7.3 to 0.7.4. - The float8 data type benchmark test was added to include the following models: -Llama 3.1 8B Instruct +DeepSeek-R1 ## Support ---------- diff --git a/models.json b/models.json index 0d0ed06..c59b227 100644 --- a/models.json +++ b/models.json @@ -478,6 +478,42 @@ "args": "--model_repo amd/c4ai-command-r-plus-FP8-KV --test_option latency,throughput --num_gpu 8 --datatype float8 --tunableop off" }, + { + "name": "pyt_vllm_command-r-plus_fp8", + "url": "", + "dockerfile": "docker/pyt_vllm", + "scripts": "scripts/vllm/run.sh", + "data": "huggingface", + "n_gpus": "-1", + "owner": "mad.support@amd.com", + "training_precision": "", + "multiple_results": "perf_c4ai-command-r-plus-FP8-KV.csv", + "tags": [ + "pyt", + "vllm" + ], + "timeout": -1, + "args": + "--model_repo amd/c4ai-command-r-plus-FP8-KV --test_option latency,throughput --num_gpu 8 --datatype float8 --tunableop off" + }, + { + "name": "pyt_vllm_deepseek-r1_fp8", + "url": "", + "dockerfile": "docker/pyt_vllm", + "scripts": "scripts/vllm/run.sh", + "data": "huggingface", + "n_gpus": "-1", + "owner": "mad.support@amd.com", + "training_precision": "", + "multiple_results": "perf_DeepSeek-R1.csv", + "tags": [ + "pyt", + "vllm" + ], + "timeout": -1, + "args": + "--model_repo deepseek-ai/DeepSeek-R1 --test_option latency,throughput --num_gpu 8 --datatype float8 --tunableop off" + }, { "name": "pyt_train_llama-3.1-8b", "url": "", diff --git a/scripts/vllm/config.csv b/scripts/vllm/config.csv index 6e9dbc8..b54705d 100644 --- a/scripts/vllm/config.csv +++ b/scripts/vllm/config.csv @@ -138,3 +138,13 @@ deepseek-ai/deepseek-moe-16b-chat,128,128,4000,4000,NA,NA,NA,0.9,10,FALSE deepseek-ai/deepseek-moe-16b-chat,128,2048,3000,3000,NA,NA,NA,0.9,10,FALSE deepseek-ai/deepseek-moe-16b-chat,2048,128,3000,3000,NA,NA,NA,0.9,10,FALSE deepseek-ai/deepseek-moe-16b-chat,2048,2048,1500,1500,NA,NA,NA,0.9,10,FALSE + +deepseek-ai/DeepSeek-R1,128,128,4000,4000,32768,32768,32768,0.9,1,FALSE +deepseek-ai/DeepSeek-R1,128,2048,1500,1500,32768,32768,32768,0.9,1,FALSE +deepseek-ai/DeepSeek-R1,2048,128,500,1500,32768,32768,32768,0.9,1,FALSE +deepseek-ai/DeepSeek-R1,2048,2048,500,1000,32768,32768,32768,0.9,1,FALSE + +deepseek-ai/DeepSeek-V3,128,128,4000,4000,32768,32768,32768,0.9,1,FALSE +deepseek-ai/DeepSeek-V3,128,2048,1500,1500,32768,32768,32768,0.9,1,FALSE +deepseek-ai/DeepSeek-V3,2048,128,500,1500,32768,32768,32768,0.9,1,FALSE +deepseek-ai/DeepSeek-V3,2048,2048,500,1000,32768,32768,32768,0.9,1,FALSE diff --git a/scripts/vllm/vllm_benchmark_report.sh b/scripts/vllm/vllm_benchmark_report.sh index c3b8b21..686f2b4 100755 --- a/scripts/vllm/vllm_benchmark_report.sh +++ b/scripts/vllm/vllm_benchmark_report.sh @@ -64,7 +64,11 @@ fi if [[ $datatype == "float16" ]]; then DTYPE=" --dtype float16 " elif [[ $datatype == "float8" ]]; then - DTYPE=" --dtype float16 --quantization fp8 --kv-cache-dtype fp8 " + if [[ $model_name == "DeepSeek-R1" ]] || [[ $model_name == "DeepSeek-V3" ]] ; then + DTYPE=" --dtype float16 --quantization fp8 --max-model-len 32768 " + else + DTYPE=" --dtype float16 --quantization fp8 --kv-cache-dtype fp8 " + fi fi OPTION_LATENCY=" --gpu-memory-utilization 0.9 "