PaddlePaddle
diff --git a/‎.github/workflows/_base_test.yml‎
Lines changed: 0 additions & 14 deletions b/‎.github/workflows/_base_test.yml‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎.github/workflows/ce_job.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/ce_job.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/pr_build_and_test.yml‎
Lines changed: 0 additions & 20 deletions b/‎.github/workflows/pr_build_and_test.yml‎
Lines changed: 0 additions & 20 deletions
diff --git a/‎.github/workflows/publish_job.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/publish_job.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/backend_request_func.py‎
Lines changed: 5 additions & 1 deletion b/‎benchmarks/backend_request_func.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎benchmarks/benchmark_dataset.py‎
Lines changed: 4 additions & 1 deletion b/‎benchmarks/benchmark_dataset.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎benchmarks/benchmark_serving.py‎
Lines changed: 7 additions & 1 deletion b/‎benchmarks/benchmark_serving.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmarks/yaml/x1-a3b-128k-wint8-h800-tp1.yaml‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/yaml/x1-a3b-128k-wint8-h800-tp1.yaml‎
Lines changed: 2 additions & 2 deletions
@@ -206,20 +206,6 @@ jobs:
           check_service 90
           python -m pytest -sv test_max_waiting_time.py || TEST_EXIT_CODE=1
 
-          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-            -H "Content-Type: application/json" \
-            -d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_mtp.yaml\", \"--enable-logprob\": \"False\"}"
-          check_service 180
-          export TEMPLATE=TOKEN_NORMAL
-          python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1
-
-          curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-            -H "Content-Type: application/json" \
-            -d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_sot.yaml\", \"--enable-logprob\": \"False\"}"
-          check_service 360
-          export TEMPLATE=TOKEN_NORMAL
-          python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1
-
           popd
           echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
           '
 
@@ -191,7 +191,7 @@ jobs:
 
           commit_id=${{ github.sha }}
           branch_name=${{ github.ref_name }}
-          target_path=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/SM${COMPILE_ARCH//,/_}/${branch_name}/${commit_id}
+          target_path=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/cu126/SM${COMPILE_ARCH//,/_}/${branch_name}/${commit_id}
 
           wget  -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py
           push_file=$(realpath bos_tools.py)
@@ -201,7 +201,7 @@ jobs:
           target_path_stripped="${target_path#paddle-qa/}"
           WHEEL_PATH=https://paddle-qa.bj.bcebos.com/${target_path_stripped}/${filename}
 
-          target_path_latest=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/SM${COMPILE_ARCH//,/_}/${branch_name}/latest
+          target_path_latest=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/cu126/SM${COMPILE_ARCH//,/_}/${branch_name}/latest
           python ${push_file} ${filename} ${target_path_latest}
           target_path_stripped_latest="${target_path_latest#paddle-qa/}"
           WHEEL_PATH_LATEST=https://paddle-qa.bj.bcebos.com/${target_path_stripped_latest}/${filename}
@@ -230,7 +230,7 @@ jobs:
 
           commit_id=${{ github.sha }}
           branch_name=${{ github.ref_name }}
-          target_path=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/SM${COMPILE_ARCH//,/_}/${branch_name}/${commit_id}
+          target_path=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/cu126/SM${COMPILE_ARCH//,/_}/${branch_name}/${commit_id}
 
           wget  -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py
           push_file=$(realpath bos_tools.py)
@@ -240,7 +240,7 @@ jobs:
           target_path_stripped="${target_path#paddle-qa/}"
           WHEEL_PATH=https://paddle-qa.bj.bcebos.com/${target_path_stripped}/${filename}
 
-          target_path_latest=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/SM${COMPILE_ARCH//,/_}/${branch_name}/latest
+          target_path_latest=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/cu126/SM${COMPILE_ARCH//,/_}/${branch_name}/latest
           python ${push_file} ${filename} ${target_path_latest}
           target_path_stripped_latest="${target_path_latest#paddle-qa/}"
           WHEEL_PATH_LATEST=https://paddle-qa.bj.bcebos.com/${target_path_stripped_latest}/${filename}
 
@@ -75,23 +75,3 @@ jobs:
       FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
       FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
       MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
-
-  accuracy_test:
-    name: Run Accuracy Tests
-    needs: [clone,build]
-    uses: ./.github/workflows/_accuracy_test.yml
-    with:
-      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
-      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
-      FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
-      MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
-
-  stable_test:
-    name: Run Stable Tests
-    needs: [clone,build]
-    uses: ./.github/workflows/_stable_test.yml
-    with:
-      DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
-      FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
-      FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
-      MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
@@ -377,5 +377,5 @@ jobs:
     with:
       DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
       FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
-      FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
+      FASTDEPLOY_WHEEL_URL: ${{ needs.build_sm8090.outputs.wheel_path }}
       MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
@@ -51,6 +51,7 @@ class RequestFuncInput:
     ignore_eos: bool = False
     language: Optional[str] = None
     debug: bool = False
+    response_format: Optional[dict] = None
 
 
 @dataclass
@@ -93,8 +94,11 @@ async def async_request_eb_openai_chat_completions(
             "stream_options": {
                 "include_usage": True,
                 "continuous_usage_stats": True,
-            },
+            }
         }
+        if request_func_input.response_format:
+            payload["response_format"] =request_func_input.response_format
+
         # 超参由yaml传入
         payload.update(request_func_input.hyper_parameters)
 
 
@@ -45,7 +45,8 @@ class SampleRequest:
     json_data: Optional[dict]
     prompt_len: int
     expected_output_len: int
-
+    response_format: Optional[dict] = None
+    
 
 class BenchmarkDataset(ABC):
     """BenchmarkDataset"""
@@ -297,6 +298,7 @@ def sample(
             json_data = entry
             prompt = entry["messages"][-1].get("content", "")
             history_QA = entry.get("messages", [])
+            response_format = entry.get("response_format")
             new_output_len = int(entry.get("max_tokens", 12288))
 
             if enable_multimodal_chat:
@@ -309,6 +311,7 @@ def sample(
                     prompt_len=0,
                     history_QA=history_QA,
                     expected_output_len=new_output_len,
+                    response_format=response_format
                 )
             )
             cnt += 1
 
@@ -336,6 +336,7 @@ async def benchmark(
         input_requests[0].no,
     )
     test_history_QA = input_requests[0].history_QA
+    response_format = input_requests[0].response_format
 
     test_input = RequestFuncInput(
         model=model_id,
@@ -351,6 +352,7 @@ async def benchmark(
         ignore_eos=ignore_eos,
         debug=debug,
         extra_body=extra_body,
+        response_format=response_format
     )
 
     print("test_input:", test_input)
@@ -382,6 +384,7 @@ async def benchmark(
             logprobs=logprobs,
             ignore_eos=ignore_eos,
             extra_body=extra_body,
+            response_format=response_format
         )
         profile_output = await request_func(request_func_input=profile_input)
         if profile_output.success:
@@ -420,6 +423,7 @@ async def limited_request_func(request_func_input, pbar):
             request.no,
         )
         history_QA = request.history_QA
+        response_format = request.response_format
 
         req_model_id, req_model_name = model_id, model_name
         if lora_modules:
@@ -440,6 +444,7 @@ async def limited_request_func(request_func_input, pbar):
             debug=debug,
             ignore_eos=ignore_eos,
             extra_body=extra_body,
+            response_format=response_format
         )
         tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar)))
     outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
@@ -455,6 +460,7 @@ async def limited_request_func(request_func_input, pbar):
             api_url=base_url + "/stop_profile",
             output_len=test_output_len,
             logprobs=logprobs,
+            response_format=response_format
         )
         profile_output = await request_func(request_func_input=profile_input)
         if profile_output.success:
@@ -982,7 +988,7 @@ def main(args: argparse.Namespace):
         if args.result_dir:
             file_name = os.path.join(args.result_dir, file_name)
         with open(file_name, "w", encoding="utf-8") as outfile:
-            json.dump(result_json, outfile)
+            json.dump(result_json, outfile, ensure_ascii=False)
         save_to_pytorch_benchmark_format(args, result_json, file_name)
 
 
 
@@ -1,5 +1,5 @@
-reasoning-parser: ernie_x1
-tool_call_parser: ernie_x1
+reasoning-parser: ernie-x1
+tool_call_parser: ernie-x1
 tensor_parallel_size: 4
 max_model_len: 65536
 max_num_seqs: 128
 
@@ -1,7 +1,7 @@
 tensor_parallel_size: 1
 max_model_len: 131072
 max_num_seqs: 32
-reasoning_parser: ernie_x1
-tool_call_parser: ernie_x1
+reasoning_parser: ernie-x1
+tool_call_parser: ernie-x1
 load_choices: "default_v1"
 quantization: wint8