support bitsandbytes quantization with qwen model (#10549)

Signed-off-by: Ubuntu <[email protected]>
vllm-project · Nov 23, 2024 · 948c859 · 948c859
1 parent 97814fb
commit 948c859
Showing 1 changed file with 12 additions and 0 deletions.
diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py
@@ -1028,6 +1028,18 @@ class QWenLLM(QWenBaseModel):
     embedding_modules = {}
     embedding_padding_modules = []
 
+    default_bitsandbytes_target_modules = [
+        ".c_attn.",
+        ".c_proj.",
+        ".w1.",
+        ".w2.",
+    ]
+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "w2": ("gate_up_proj", 0),
+        "w1": ("gate_up_proj", 1),
+    }
+
 
 class QWenVL(QWenBaseModel, SupportsMultiModal):
     packed_modules_mapping = {