Skip to content

Commit 28de91b

Browse files
authored
[Graph Optimization] SOT+CUDAGraph support ERNIE4.5T VL 28B / 424B (#4645)
* 45TVL support sot+CUDAGraph * mv unitest from ce_deploy 2 e2e * add test_EB_VL_Lite_sot_serving * rm useless line * add openai_client * fix unitest && reduce computing resources
1 parent 937bcfc commit 28de91b

File tree

3 files changed

+454
-4
lines changed

3 files changed

+454
-4
lines changed

benchmarks/benchmark_serving.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -982,7 +982,7 @@ def main(args: argparse.Namespace):
982982
if args.result_dir:
983983
file_name = os.path.join(args.result_dir, file_name)
984984
with open(file_name, "w", encoding="utf-8") as outfile:
985-
json.dump(result_json, outfile)
985+
json.dump(result_json, outfile, ensure_ascii=False)
986986
save_to_pytorch_benchmark_format(args, result_json, file_name)
987987

988988

fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def load_state_dict(self, state_dict):
277277
def forward(self, hidden_states: paddle.Tensor, vl_moe_meta: VLMoEMeta):
278278
if self.num_shared_experts > 0:
279279
shared_experts_out = self.shared_experts(hidden_states)
280-
hidden_states, vl_moe_meta.text_input, vl_moe_meta.image_input = text_image_gather_scatter(
280+
hidden_states, text_input, image_input = text_image_gather_scatter(
281281
hidden_states,
282282
vl_moe_meta.text_input,
283283
vl_moe_meta.image_input,
@@ -286,8 +286,8 @@ def forward(self, hidden_states: paddle.Tensor, vl_moe_meta: VLMoEMeta):
286286
vl_moe_meta.image_index,
287287
True,
288288
)
289-
text_out = self.text_fused_moe(vl_moe_meta.text_input)
290-
image_out = self.image_fused_moe(vl_moe_meta.image_input)
289+
text_out = self.text_fused_moe(text_input)
290+
image_out = self.image_fused_moe(image_input)
291291
hidden_states, _, _ = text_image_gather_scatter(
292292
hidden_states,
293293
text_out,

0 commit comments

Comments
 (0)