From 8a93a598d9ac265882e55432e7aef55c8bff23f4 Mon Sep 17 00:00:00 2001 From: "Wang, Yi" Date: Thu, 21 Nov 2024 19:15:36 +0800 Subject: [PATCH] fix the issue that len(tokenizer(prompt)["input_ids"]) > prompt_len (#10524) Signed-off-by: Wang, Yi A --- benchmarks/backend_request_func.py | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 25c8b1bbf3e22..c3fed56e8a956 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -54,6 +54,7 @@ async def async_request_tgi( "do_sample": True, "temperature": 0.01, # TGI does not accept 0.0 temperature. "top_p": 0.99, # TGI does not accept 1.0 top_p. + "truncate": request_func_input.prompt_len, # TGI does not accept ignore_eos flag. } payload = {