diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert.py b/python/llm/src/ipex_llm/transformers/npu_models/convert.py index 3b076e21aa6..e76619c70ae 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert.py @@ -321,9 +321,6 @@ def generate( new_tokens = new_generate_kwargs['max_new_tokens'] invalidInputError(input_length + new_tokens <= self.kv_len + 1, "Input plus output tokens should not exceed max_context_len.") - # TODO: may optimize this part later - invalidInputError(new_tokens < 1024, - f"Generated tokens ({new_tokens}) exceed named pipeline limitation.") if "eos_token_id" not in new_generate_kwargs: generation_config = GenerationConfig.from_model_config(self.config)