@@ -336,6 +336,7 @@ async def benchmark(
336336 input_requests [0 ].no ,
337337 )
338338 test_history_QA = input_requests [0 ].history_QA
339+ response_format = input_requests [0 ].response_format
339340
340341 test_input = RequestFuncInput (
341342 model = model_id ,
@@ -351,6 +352,7 @@ async def benchmark(
351352 ignore_eos = ignore_eos ,
352353 debug = debug ,
353354 extra_body = extra_body ,
355+ response_format = response_format
354356 )
355357
356358 print ("test_input:" , test_input )
@@ -382,6 +384,7 @@ async def benchmark(
382384 logprobs = logprobs ,
383385 ignore_eos = ignore_eos ,
384386 extra_body = extra_body ,
387+ response_format = response_format
385388 )
386389 profile_output = await request_func (request_func_input = profile_input )
387390 if profile_output .success :
@@ -420,6 +423,7 @@ async def limited_request_func(request_func_input, pbar):
420423 request .no ,
421424 )
422425 history_QA = request .history_QA
426+ response_format = request .response_format
423427
424428 req_model_id , req_model_name = model_id , model_name
425429 if lora_modules :
@@ -440,6 +444,7 @@ async def limited_request_func(request_func_input, pbar):
440444 debug = debug ,
441445 ignore_eos = ignore_eos ,
442446 extra_body = extra_body ,
447+ response_format = response_format
443448 )
444449 tasks .append (asyncio .create_task (limited_request_func (request_func_input = request_func_input , pbar = pbar )))
445450 outputs : list [RequestFuncOutput ] = await asyncio .gather (* tasks )
@@ -455,6 +460,7 @@ async def limited_request_func(request_func_input, pbar):
455460 api_url = base_url + "/stop_profile" ,
456461 output_len = test_output_len ,
457462 logprobs = logprobs ,
463+ response_format = response_format
458464 )
459465 profile_output = await request_func (request_func_input = profile_input )
460466 if profile_output .success :
@@ -982,7 +988,7 @@ def main(args: argparse.Namespace):
982988 if args .result_dir :
983989 file_name = os .path .join (args .result_dir , file_name )
984990 with open (file_name , "w" , encoding = "utf-8" ) as outfile :
985- json .dump (result_json , outfile )
991+ json .dump (result_json , outfile , ensure_ascii = False )
986992 save_to_pytorch_benchmark_format (args , result_json , file_name )
987993
988994
0 commit comments