Skip to content

Commit a7a07eb

Browse files
authored
Merge branch 'develop' into new_add_trace_logger
2 parents ceedfeb + 9eff788 commit a7a07eb

File tree

77 files changed

+3593
-1423
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+3593
-1423
lines changed

.github/workflows/_base_test.yml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -206,20 +206,6 @@ jobs:
206206
check_service 90
207207
python -m pytest -sv test_max_waiting_time.py || TEST_EXIT_CODE=1
208208
209-
curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
210-
-H "Content-Type: application/json" \
211-
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_mtp.yaml\", \"--enable-logprob\": \"False\"}"
212-
check_service 180
213-
export TEMPLATE=TOKEN_NORMAL
214-
python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1
215-
216-
curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
217-
-H "Content-Type: application/json" \
218-
-d "{\"--model\": \"/MODELDATA/ernie-4_5-21b-a3b-bf16-paddle\", \"--config\": \"21b_sot.yaml\", \"--enable-logprob\": \"False\"}"
219-
check_service 360
220-
export TEMPLATE=TOKEN_NORMAL
221-
python -m pytest -sv test_seed_usage.py -k "not test_seed_stream" || TEST_EXIT_CODE=1
222-
223209
popd
224210
echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env
225211
'

.github/workflows/ce_job.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ jobs:
191191
192192
commit_id=${{ github.sha }}
193193
branch_name=${{ github.ref_name }}
194-
target_path=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/SM${COMPILE_ARCH//,/_}/${branch_name}/${commit_id}
194+
target_path=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/cu126/SM${COMPILE_ARCH//,/_}/${branch_name}/${commit_id}
195195
196196
wget -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py
197197
push_file=$(realpath bos_tools.py)
@@ -201,7 +201,7 @@ jobs:
201201
target_path_stripped="${target_path#paddle-qa/}"
202202
WHEEL_PATH=https://paddle-qa.bj.bcebos.com/${target_path_stripped}/${filename}
203203
204-
target_path_latest=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/SM${COMPILE_ARCH//,/_}/${branch_name}/latest
204+
target_path_latest=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/cu126/SM${COMPILE_ARCH//,/_}/${branch_name}/latest
205205
python ${push_file} ${filename} ${target_path_latest}
206206
target_path_stripped_latest="${target_path_latest#paddle-qa/}"
207207
WHEEL_PATH_LATEST=https://paddle-qa.bj.bcebos.com/${target_path_stripped_latest}/${filename}
@@ -230,7 +230,7 @@ jobs:
230230
231231
commit_id=${{ github.sha }}
232232
branch_name=${{ github.ref_name }}
233-
target_path=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/SM${COMPILE_ARCH//,/_}/${branch_name}/${commit_id}
233+
target_path=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/cu126/SM${COMPILE_ARCH//,/_}/${branch_name}/${commit_id}
234234
235235
wget -q --no-proxy --no-check-certificate https://paddle-qa.bj.bcebos.com/CodeSync/develop/PaddlePaddle/PaddleTest/tools/bos_tools.py
236236
push_file=$(realpath bos_tools.py)
@@ -240,7 +240,7 @@ jobs:
240240
target_path_stripped="${target_path#paddle-qa/}"
241241
WHEEL_PATH=https://paddle-qa.bj.bcebos.com/${target_path_stripped}/${filename}
242242
243-
target_path_latest=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/SM${COMPILE_ARCH//,/_}/${branch_name}/latest
243+
target_path_latest=paddle-qa/paddle-pipeline/FastDeploy_ActionCE/cu126/SM${COMPILE_ARCH//,/_}/${branch_name}/latest
244244
python ${push_file} ${filename} ${target_path_latest}
245245
target_path_stripped_latest="${target_path_latest#paddle-qa/}"
246246
WHEEL_PATH_LATEST=https://paddle-qa.bj.bcebos.com/${target_path_stripped_latest}/${filename}

.github/workflows/pr_build_and_test.yml

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -75,23 +75,3 @@ jobs:
7575
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
7676
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
7777
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
78-
79-
accuracy_test:
80-
name: Run Accuracy Tests
81-
needs: [clone,build]
82-
uses: ./.github/workflows/_accuracy_test.yml
83-
with:
84-
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
85-
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
86-
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
87-
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"
88-
89-
stable_test:
90-
name: Run Stable Tests
91-
needs: [clone,build]
92-
uses: ./.github/workflows/_stable_test.yml
93-
with:
94-
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
95-
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
96-
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
97-
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"

.github/workflows/publish_job.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,5 +377,5 @@ jobs:
377377
with:
378378
DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate
379379
FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
380-
FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }}
380+
FASTDEPLOY_WHEEL_URL: ${{ needs.build_sm8090.outputs.wheel_path }}
381381
MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData"

benchmarks/backend_request_func.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class RequestFuncInput:
5151
ignore_eos: bool = False
5252
language: Optional[str] = None
5353
debug: bool = False
54+
response_format: Optional[dict] = None
5455

5556

5657
@dataclass
@@ -93,8 +94,11 @@ async def async_request_eb_openai_chat_completions(
9394
"stream_options": {
9495
"include_usage": True,
9596
"continuous_usage_stats": True,
96-
},
97+
}
9798
}
99+
if request_func_input.response_format:
100+
payload["response_format"] =request_func_input.response_format
101+
98102
# 超参由yaml传入
99103
payload.update(request_func_input.hyper_parameters)
100104

benchmarks/benchmark_dataset.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ class SampleRequest:
4545
json_data: Optional[dict]
4646
prompt_len: int
4747
expected_output_len: int
48-
48+
response_format: Optional[dict] = None
49+
4950

5051
class BenchmarkDataset(ABC):
5152
"""BenchmarkDataset"""
@@ -297,6 +298,7 @@ def sample(
297298
json_data = entry
298299
prompt = entry["messages"][-1].get("content", "")
299300
history_QA = entry.get("messages", [])
301+
response_format = entry.get("response_format")
300302
new_output_len = int(entry.get("max_tokens", 12288))
301303

302304
if enable_multimodal_chat:
@@ -309,6 +311,7 @@ def sample(
309311
prompt_len=0,
310312
history_QA=history_QA,
311313
expected_output_len=new_output_len,
314+
response_format=response_format
312315
)
313316
)
314317
cnt += 1

benchmarks/benchmark_serving.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ async def benchmark(
336336
input_requests[0].no,
337337
)
338338
test_history_QA = input_requests[0].history_QA
339+
response_format = input_requests[0].response_format
339340

340341
test_input = RequestFuncInput(
341342
model=model_id,
@@ -351,6 +352,7 @@ async def benchmark(
351352
ignore_eos=ignore_eos,
352353
debug=debug,
353354
extra_body=extra_body,
355+
response_format=response_format
354356
)
355357

356358
print("test_input:", test_input)
@@ -382,6 +384,7 @@ async def benchmark(
382384
logprobs=logprobs,
383385
ignore_eos=ignore_eos,
384386
extra_body=extra_body,
387+
response_format=response_format
385388
)
386389
profile_output = await request_func(request_func_input=profile_input)
387390
if profile_output.success:
@@ -420,6 +423,7 @@ async def limited_request_func(request_func_input, pbar):
420423
request.no,
421424
)
422425
history_QA = request.history_QA
426+
response_format = request.response_format
423427

424428
req_model_id, req_model_name = model_id, model_name
425429
if lora_modules:
@@ -440,6 +444,7 @@ async def limited_request_func(request_func_input, pbar):
440444
debug=debug,
441445
ignore_eos=ignore_eos,
442446
extra_body=extra_body,
447+
response_format=response_format
443448
)
444449
tasks.append(asyncio.create_task(limited_request_func(request_func_input=request_func_input, pbar=pbar)))
445450
outputs: list[RequestFuncOutput] = await asyncio.gather(*tasks)
@@ -455,6 +460,7 @@ async def limited_request_func(request_func_input, pbar):
455460
api_url=base_url + "/stop_profile",
456461
output_len=test_output_len,
457462
logprobs=logprobs,
463+
response_format=response_format
458464
)
459465
profile_output = await request_func(request_func_input=profile_input)
460466
if profile_output.success:
@@ -982,7 +988,7 @@ def main(args: argparse.Namespace):
982988
if args.result_dir:
983989
file_name = os.path.join(args.result_dir, file_name)
984990
with open(file_name, "w", encoding="utf-8") as outfile:
985-
json.dump(result_json, outfile)
991+
json.dump(result_json, outfile, ensure_ascii=False)
986992
save_to_pytorch_benchmark_format(args, result_json, file_name)
987993

988994

benchmarks/yaml/x1-64k-w4a8c8-tp4.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
reasoning-parser: ernie_x1
2-
tool_call_parser: ernie_x1
1+
reasoning-parser: ernie-x1
2+
tool_call_parser: ernie-x1
33
tensor_parallel_size: 4
44
max_model_len: 65536
55
max_num_seqs: 128
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
tensor_parallel_size: 1
22
max_model_len: 131072
33
max_num_seqs: 32
4-
reasoning_parser: ernie_x1
5-
tool_call_parser: ernie_x1
4+
reasoning_parser: ernie-x1
5+
tool_call_parser: ernie-x1
66
load_choices: "default_v1"
77
quantization: wint8

0 commit comments

Comments
 (0)