@@ -360,6 +360,7 @@ def _process_sampling_with_logprob_batch_output(self):
360360 metrics = RequestMetrics (
361361 arrival_time = task .arrival_time ,
362362 inference_start_time = task .inference_start_time ,
363+ model_execute_time = time .time () - task .inference_start_time ,
363364 first_token_time = time .time () - task .inference_start_time ,
364365 time_in_queue = task .schedule_start_time - task .preprocess_end_time ,
365366 preprocess_cost_time = task .preprocess_end_time - task .preprocess_start_time ,
@@ -503,6 +504,7 @@ def _process_batch_output(self):
503504 metrics = RequestMetrics (
504505 arrival_time = task .arrival_time ,
505506 inference_start_time = task .inference_start_time ,
507+ model_execute_time = time .time () - task .inference_start_time ,
506508 first_token_time = time .time () - task .inference_start_time ,
507509 time_in_queue = task .schedule_start_time - task .preprocess_end_time ,
508510 preprocess_cost_time = task .preprocess_end_time - task .preprocess_start_time ,
@@ -514,6 +516,7 @@ def _process_batch_output(self):
514516 else :
515517 metrics = RequestMetrics (
516518 arrival_time = time .time (),
519+ model_execute_time = time .time () - task .inference_start_time ,
517520 request_start_time = task .arrival_time ,
518521 )
519522 self .number_of_output_tokens += len (token_ids )
0 commit comments