diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml index 0cd4a9b2fe3..00884dbe21d 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-fp8.yaml @@ -23,14 +23,14 @@ exclude: - 'meta-llama/Llama-2-7b-chat-hf:512:8' - 'meta-llama/Llama-2-7b-chat-hf:1024:8' - 'meta-llama/Llama-2-7b-chat-hf:2048:8' -# - 'THUDM/chatglm2-6b:2048:8' -# - 'THUDM/chatglm3-6b:2048:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' -# - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' -# - 'baichuan-inc/Baichuan2-7B-Chat:512:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' + - 'THUDM/chatglm2-6b:2048:8' + - 'THUDM/chatglm3-6b:2048:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' + - 'baichuan-inc/Baichuan2-7B-Chat:512:8' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' - 'Qwen/Qwen-7B-Chat:2048:1' - 'Qwen/Qwen-7B-Chat:1024:2' - 'Qwen/Qwen-7B-Chat:2048:2' diff --git a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml index b1a2b2536c0..cb9f7b30e9c 100644 --- a/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml +++ b/python/llm/test/benchmark/stable-version-arc-perf-test-sym_int4.yaml @@ -22,14 +22,14 @@ exclude: - 'meta-llama/Llama-2-7b-chat-hf:2048:4' - 'meta-llama/Llama-2-7b-chat-hf:1024:8' - 'meta-llama/Llama-2-7b-chat-hf:2048:8' -# - 'THUDM/chatglm2-6b:2048:8' -# - 'THUDM/chatglm3-6b:2048:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' -# - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' -# - 'baichuan-inc/Baichuan2-7B-Chat:512:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' -# - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' + - 'THUDM/chatglm2-6b:2048:8' + - 'THUDM/chatglm3-6b:2048:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:2' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:4' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:4' + - 'baichuan-inc/Baichuan2-7B-Chat:512:8' + - 'baichuan-inc/Baichuan2-7B-Chat:1024:8' + - 'baichuan-inc/Baichuan2-7B-Chat:2048:8' - 'Qwen/Qwen-7B-Chat:2048:2' - 'Qwen/Qwen-7B-Chat:1024:4' - 'Qwen/Qwen-7B-Chat:2048:4' diff --git a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml index 80a67d71e3f..bc64ad92305 100644 --- a/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml +++ b/python/llm/test/benchmark/stable-version-arc-stress-test-fp8.yaml @@ -17,5 +17,5 @@ test_api: - "transformer_int4_gpu" # on Intel GPU cpu_embedding: False # whether put embedding to CPU (only avaiable now for gpu win related test_api) exclude: -# - 'baichuan-inc/Baichuan2-7B-Chat:2048' + - 'baichuan-inc/Baichuan2-7B-Chat:2048' - 'Qwen/Qwen-7B-Chat:2048' \ No newline at end of file