From 6a06234a45746ce4af7473856940d28e80774382 Mon Sep 17 00:00:00 2001 From: sstamenk Date: Tue, 21 Oct 2025 15:35:21 +0200 Subject: [PATCH 1/3] Enable bitsandbytes quantization on warp size 32 AMD GPUs Signed-off-by: sstamenk --- vllm/platforms/rocm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index b2ec40849446..61b2435eb8c8 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -202,6 +202,9 @@ class RocmPlatform(Platform): "petit_nvfp4", "torchao", ] + # bitsandbytes is not supported on GPUs with warp size 64 (gfx9) + if not on_gfx9(): + supported_quantization += ["bitsandbytes"] @classmethod def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> "_Backend": From 9fdecf574db3a2cb102ce4d9d311c6e23ca3babb Mon Sep 17 00:00:00 2001 From: Strahinja Stamenkovic Date: Mon, 10 Nov 2025 10:52:22 +0100 Subject: [PATCH 2/3] Enable bitsandbytes unit tests on Radeon --- tests/models/quantization/test_bitsandbytes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py index 24220978534c..808ffd89f47f 100644 --- a/tests/models/quantization/test_bitsandbytes.py +++ b/tests/models/quantization/test_bitsandbytes.py @@ -10,13 +10,14 @@ from tests.quantization.utils import is_quant_method_supported from vllm.platforms import current_platform +from vllm.platforms.rocm import on_gfx9 from ...utils import compare_two_settings, multi_gpu_test from ..utils import check_embeddings_close, check_logprobs_close pytestmark = pytest.mark.skipif( - current_platform.is_rocm(), - reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)", + current_platform.is_rocm() and on_gfx9(), + reason="bitsandbytes quantization not supported on Instinct (warp size 64 limitation)", ) models_4bit_to_test = [ From 9b3214abf3079043bb58b15af21691321513156a Mon Sep 17 00:00:00 2001 From: Strahinja Stamenkovic Date: Mon, 10 Nov 2025 10:54:14 +0100 Subject: [PATCH 3/3] Update comment --- vllm/platforms/rocm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index d5fd2a402a67..376e61451d00 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -200,7 +200,7 @@ class RocmPlatform(Platform): "petit_nvfp4", "torchao", ] - # bitsandbytes is not supported on GPUs with warp size 64 (gfx9) + # bitsandbytes quantization not supported on Instinct (warp size 64 limitation) if not on_gfx9(): supported_quantization += ["bitsandbytes"]