From 54d9a590d4169ceb85066b62753593c5a5f2f4fe Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:18:22 +0800 Subject: [PATCH] [NPU]Fix eos_token setting (#12475) --- python/llm/src/ipex_llm/transformers/npu_models/convert.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert.py b/python/llm/src/ipex_llm/transformers/npu_models/convert.py index 9cae68ae16f..3b076e21aa6 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert.py @@ -326,7 +326,11 @@ def generate( f"Generated tokens ({new_tokens}) exceed named pipeline limitation.") if "eos_token_id" not in new_generate_kwargs: - eos = 0xffffffff + generation_config = GenerationConfig.from_model_config(self.config) + if hasattr(generation_config, "eos_token_id"): + eos = generation_config.eos_token_id + else: + eos = 0xffffffff else: eos = new_generate_kwargs["eos_token_id"] output_tokens = []