add vLLM glm4 fix (intel-analytics#12474)

przemekmatusiak · Dec 10, 2024 · 1e8ba3d · 1e8ba3d
1 parent 46514b2
commit 1e8ba3d
Showing 1 changed file with 1 addition and 3 deletions.
diff --git a/python/llm/src/ipex_llm/vllm/xpu/model_convert.py b/python/llm/src/ipex_llm/vllm/xpu/model_convert.py
@@ -94,9 +94,7 @@ def _ipex_llm_load_model(self) -> None:
             from ipex_llm import optimize_model
             import os
             not_convert_last_mlp = os.getenv("IPEX_LLM_NOT_CONVERT_LAST_MLP", None)
-            is_glm4_model = "glm-4" in self.model_config.model.lower()
-            is_codegeex4_model = "codegeex4-all" in self.model_config.model.lower()
-            if not_convert_last_mlp is not None or is_glm4_model or is_codegeex4_model:
+            if not_convert_last_mlp is not None:
                 # only use to avoid nan value in last mlp forward running glm4-9b-chat
                 modules = ["35.mlp", "36.mlp", "37.mlp", "38.mlp", "39.mlp"]
             else: