diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert.py b/python/llm/src/ipex_llm/transformers/npu_models/convert.py
index 9cae68ae16f..3b076e21aa6 100644
--- a/python/llm/src/ipex_llm/transformers/npu_models/convert.py
+++ b/python/llm/src/ipex_llm/transformers/npu_models/convert.py
@@ -326,7 +326,11 @@ def generate(
                       f"Generated tokens ({new_tokens}) exceed named pipeline limitation.")
 
     if "eos_token_id" not in new_generate_kwargs:
-        eos = 0xffffffff
+        generation_config = GenerationConfig.from_model_config(self.config)
+        if hasattr(generation_config, "eos_token_id"):
+            eos = generation_config.eos_token_id
+        else:
+            eos = 0xffffffff
     else:
         eos = new_generate_kwargs["eos_token_id"]
     output_tokens = []