From eec305d56551e082209ccbda051ff2fddca940a9 Mon Sep 17 00:00:00 2001 From: songhappy Date: Tue, 27 Aug 2024 18:57:30 -0700 Subject: [PATCH] log results and models --- python/llm/dev/benchmark/all-in-one/run.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/llm/dev/benchmark/all-in-one/run.py b/python/llm/dev/benchmark/all-in-one/run.py index 53c47677bb6d..c9eceb500eba 100644 --- a/python/llm/dev/benchmark/all-in-one/run.py +++ b/python/llm/dev/benchmark/all-in-one/run.py @@ -2062,13 +2062,15 @@ def run_pipeline_parallel_gpu(repo_id, lookahead = True run_model(model, api, in_out_pairs, conf['local_model_hub'], conf['warm_up'], conf['num_trials'], conf['num_beams'], conf['low_bit'], conf['cpu_embedding'], batch_size, streaming, use_fp16_torch_dtype, lookahead, task, optimize_model) + print("-------------------- Finish running model: {} --------------------".format(model)) df = pd.DataFrame(results, columns=['model', '1st token avg latency (ms)', '2+ avg latency (ms/token)', 'encoder time (ms)', 'input/output tokens', 'batch_size', 'actual input/output tokens', 'num_beams', 'low_bit', 'cpu_embedding', 'model loading time (s)', 'peak mem (GB)', 'streaming', 'use_fp16_torch_dtype']) + print("-------------------- Results: {} --------------------".format(results)) if "pipeline" in api or "deepspeed" in api: if torch.distributed.get_rank() == 0: df.index += max(line_counter - 1, 0) - if line_counter == 0: + if line_counter == 0 df.to_csv(csv_name, mode='a', encoding='utf-8') else: df.to_csv(csv_name, mode='a', header=None, encoding='utf-8') @@ -2082,3 +2084,4 @@ def run_pipeline_parallel_gpu(repo_id, df.to_csv(csv_name, mode='a', header=None, encoding='utf-8') line_counter += len(df.index) results = [] +