diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert.py b/python/llm/src/ipex_llm/transformers/npu_models/convert.py index 9cae68ae16f..3b076e21aa6 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert.py @@ -326,7 +326,11 @@ def generate( f"Generated tokens ({new_tokens}) exceed named pipeline limitation.") if "eos_token_id" not in new_generate_kwargs: - eos = 0xffffffff + generation_config = GenerationConfig.from_model_config(self.config) + if hasattr(generation_config, "eos_token_id"): + eos = generation_config.eos_token_id + else: + eos = 0xffffffff else: eos = new_generate_kwargs["eos_token_id"] output_tokens = []