From 6a06234a45746ce4af7473856940d28e80774382 Mon Sep 17 00:00:00 2001
From: sstamenk <strahinja.stamenkovic@amd.com>
Date: Tue, 21 Oct 2025 15:35:21 +0200
Subject: [PATCH 1/3] Enable bitsandbytes quantization on warp size 32 AMD GPUs

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 vllm/platforms/rocm.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index b2ec40849446..61b2435eb8c8 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -202,6 +202,9 @@ class RocmPlatform(Platform):
         "petit_nvfp4",
         "torchao",
     ]
+    # bitsandbytes is not supported on GPUs with warp size 64 (gfx9)
+    if not on_gfx9():
+        supported_quantization += ["bitsandbytes"]
 
     @classmethod
     def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> "_Backend":

From 9fdecf574db3a2cb102ce4d9d311c6e23ca3babb Mon Sep 17 00:00:00 2001
From: Strahinja Stamenkovic <sstamenk@amd.com>
Date: Mon, 10 Nov 2025 10:52:22 +0100
Subject: [PATCH 2/3] Enable bitsandbytes unit tests on Radeon

---
 tests/models/quantization/test_bitsandbytes.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index 24220978534c..808ffd89f47f 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -10,13 +10,14 @@
 
 from tests.quantization.utils import is_quant_method_supported
 from vllm.platforms import current_platform
+from vllm.platforms.rocm import on_gfx9
 
 from ...utils import compare_two_settings, multi_gpu_test
 from ..utils import check_embeddings_close, check_logprobs_close
 
 pytestmark = pytest.mark.skipif(
-    current_platform.is_rocm(),
-    reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)",
+    current_platform.is_rocm() and on_gfx9(),
+    reason="bitsandbytes quantization not supported on Instinct (warp size 64 limitation)",
 )
 
 models_4bit_to_test = [

From 9b3214abf3079043bb58b15af21691321513156a Mon Sep 17 00:00:00 2001
From: Strahinja Stamenkovic <sstamenk@amd.com>
Date: Mon, 10 Nov 2025 10:54:14 +0100
Subject: [PATCH 3/3] Update comment

---
 vllm/platforms/rocm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index d5fd2a402a67..376e61451d00 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -200,7 +200,7 @@ class RocmPlatform(Platform):
         "petit_nvfp4",
         "torchao",
     ]
-    # bitsandbytes is not supported on GPUs with warp size 64 (gfx9)
+    # bitsandbytes quantization not supported on Instinct (warp size 64 limitation)
     if not on_gfx9():
         supported_quantization += ["bitsandbytes"]