Enable bitsandbytes quantization on warp size 32 AMD GPUs

sstamenk · sstamenk · commit d621cf6bb475 · 2025-10-21T15:35:21.000+02:00
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -199,6 +199,9 @@ class RocmPlatform(Platform):
         "petit_nvfp4",
         "torchao",
     ]
+    # bitsandbytes is not supported on GPUs with warp size 64 (gfx9)
+    if not on_gfx9():
+        supported_quantization += ["bitsandbytes"]
 
     @classmethod
     def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> "_Backend":

Original file line number	Diff line number	Diff line change
`@@ -199,6 +199,9 @@ class RocmPlatform(Platform):`
`199`	`199`	`"petit_nvfp4",`
`200`	`200`	`"torchao",`
`201`	`201`	`]`
	`202`	`+ # bitsandbytes is not supported on GPUs with warp size 64 (gfx9)`
	`203`	`+ if not on_gfx9():`
	`204`	`+ supported_quantization += ["bitsandbytes"]`
`202`	`205`
`203`	`206`	`@classmethod`
`204`	`207`	`def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> "_Backend":`