diff --git a/python/llm/src/ipex_llm/transformers/npu_model.py b/python/llm/src/ipex_llm/transformers/npu_model.py index 2eb54e515b4..c7fc2f18bff 100644 --- a/python/llm/src/ipex_llm/transformers/npu_model.py +++ b/python/llm/src/ipex_llm/transformers/npu_model.py @@ -276,7 +276,8 @@ def optimize_npu_model(cls, *args, **kwargs): model.share_memory() if not pipeline: - if model.config.model_type in ["qwen2", "llama", "minicpm"]: + if (not hasattr(model, 'llm') and + model.config.model_type in ["qwen2", "llama", "minicpm"]): from ipex_llm.transformers.npu_models.convert import optimize_llm_single_process optimize_llm_single_process( llm,