Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
a7794e8
fix(config): add dsv3 tokenizer file
hushenwei2000 Sep 22, 2025
ed4d341
bugfix: ernie dataset tests (#2627)
Jonathans575 Sep 18, 2025
a75a7a7
remove flashmask checker (#2631)
WYB27 Sep 18, 2025
1bae114
【GPT-OSS】update sliding_attention layer use flashmask (#2606)
xiaoguoguo626807 Sep 19, 2025
e5bd1ab
[Bug] Fix precision of gate and e_score_correction_bias in Glm4Moe (#…
DrownFish19 Sep 19, 2025
f2d82ad
Add Qwen3 download source (#2638)
Ace-To-HYB Sep 19, 2025
fd880dc
[CI] update pytest config (#2620)
Liujie0926 Sep 19, 2025
c12cd01
examples update yaml training config (#2644)
llbdyiu66 Sep 19, 2025
ac31b1f
fix save tensor dtype (#2642)
llbdyiu66 Sep 19, 2025
1f31898
Fix the issue of loading ckpt when retraining (#2645)
Ace-To-HYB Sep 19, 2025
ab9273a
feat(MoE Layer): add unified MoE with DeepEP (part 1)
hushenwei2000 Sep 23, 2025
d13391d
feat(MoE Layer): add unified MoE with DeepEP (part 1)
hushenwei2000 Sep 24, 2025
996421f
feat(MoE Layer): add unified MoE with DeepEP (part 3)
hushenwei2000 Sep 25, 2025
3851143
feat(MoE Layer): add unified MoE with DeepEP
hushenwei2000 Sep 25, 2025
2c290ac
feat(MoE Layer): add unified MoE with DeepEP
hushenwei2000 Sep 26, 2025
503cfe2
feat(MoE Layer): add unified MoE with DeepEP
hushenwei2000 Sep 28, 2025
6477ade
fix(config): add dsv3 tokenizer file
hushenwei2000 Sep 22, 2025
df51bbc
Merge branch 'develop' into add_moe_ep
hushenwei2000 Sep 28, 2025
6b20779
Merge branch 'develop' into add_moe_ep
hushenwei2000 Sep 28, 2025
1dcee3e
Merge branch 'develop' into add_moe_ep
hushenwei2000 Sep 28, 2025
80e0999
Format code
hushenwei2000 Sep 28, 2025
c64d4b8
Format code
hushenwei2000 Sep 28, 2025
e94098b
Add customized loss system
hushenwei2000 Sep 29, 2025
a7b3e09
feat _probs_drop_policy
hushenwei2000 Sep 30, 2025
093748d
fix _forward_traditional_moe method
hushenwei2000 Sep 30, 2025
a71a620
temporary commit(test): used for test
hushenwei2000 Oct 9, 2025
230f074
Align single GPU Qwen3 Precision
hushenwei2000 Oct 21, 2025
20b59d0
update train_gpu.sh
hushenwei2000 Oct 22, 2025
7c2ecc6
refine code; support DeepEP
hushenwei2000 Oct 22, 2025
491ebfa
support sharding ep
hushenwei2000 Oct 29, 2025
d36745e
fix(config): add dsv3 tokenizer file
hushenwei2000 Sep 22, 2025
8dec0f6
Merge branch 'develop' into add_moe_ep
hushenwei2000 Oct 29, 2025
edc777e
modify loading logic
hushenwei2000 Oct 29, 2025
39a4edf
remove examples
hushenwei2000 Oct 29, 2025
779700f
fix code
hushenwei2000 Oct 29, 2025
e710c9f
remove moe_config.json; fix code
hushenwei2000 Oct 30, 2025
6977362
lint code
hushenwei2000 Oct 30, 2025
e208707
Remove unused import
hushenwei2000 Nov 2, 2025
b1f00d1
remove moe_expert; refine code
hushenwei2000 Nov 2, 2025
0476ae0
remove moe_config
hushenwei2000 Nov 2, 2025
ea2182f
add ep_communication_type configuration
hushenwei2000 Nov 2, 2025
7e9e276
lint code
hushenwei2000 Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions paddleformers/nn/moe_deepep/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
from contextlib import suppress
from typing import TYPE_CHECKING

from ...utils.lazy_import import _LazyModule

import_structure = {
"modular_moe_layer": ["ModularMoELayer"],
"moe_communication": ["MoECommunicationInterface", "AllToAllMoECommunication", "DeepEPMoECommunication"],
"moe_expert": ["MoEExpertInterface", "StandardMoEExpert", "Qwen2MLP"],
"moe_gate": ["PretrainedMoEGate"],
"moe_factory": ["QuickAccessMoEFactory"],
}

if TYPE_CHECKING:
from .modular_moe_layer import ModularMoELayer
from .moe_communication import (
AllToAllMoECommunication,
DeepEPMoECommunication,
MoECommunicationInterface,
)
from .moe_expert import MoEExpertInterface, Qwen2MLP, StandardMoEExpert
from .moe_factory import *
from .moe_gate import PretrainedMoEGate
else:
sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
import_structure,
module_spec=__spec__,
)
Loading