Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature, Hardware] Enable DeepseekV3 on AMD GPUs #2601

Merged
merged 27 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
45dfe9e
Add hip config
BruceXcluding Dec 26, 2024
d315402
Merge branch 'sgl-project:main' into main
BruceXcluding Dec 27, 2024
57a5006
Fix AMD moe_align and triton stage config
Dec 27, 2024
3fa113b
fix fused_moe.py conflict
BruceXcluding Dec 27, 2024
f1c48e2
Merge branch 'main' into main
HaiShaw Dec 27, 2024
6fb6d7c
fix typo
BruceXcluding Dec 27, 2024
83a682a
Merge remote-tracking branch 'upstream/main'
BruceXcluding Dec 27, 2024
732c6b5
remove not_hip in fused_moe
BruceXcluding Dec 27, 2024
a645383
Merge branch 'sgl-project:main' into main
BruceXcluding Dec 28, 2024
8a62e6e
Add normalize_e4m3fnuz into block quant
Dec 28, 2024
2b4afba
merged upstream and add amd block_shape moe config
Dec 28, 2024
f0122b7
Fix shmem/LDS size constraint on AMD MI3xx
HaiShaw Dec 29, 2024
4379b5c
Lint
HaiShaw Dec 29, 2024
fe54618
Merge branch 'main' into main
HaiShaw Dec 29, 2024
0a3b5c1
fix MOE_PADDING=1 mismatch
Dec 29, 2024
ba1597c
Merge branch 'sgl-project:main' into main
BruceXcluding Dec 29, 2024
1c48b3d
fix e4m3fnuz scaling max
Dec 30, 2024
a021825
Merge branch 'sgl-project:main' into main
BruceXcluding Dec 30, 2024
7aad77e
refactor setup.py with rocm
Dec 30, 2024
3dddac3
merge haishaw FP8 Numerical fix
Dec 30, 2024
ca11e11
Merge branch 'sgl-project:main' into main
BruceXcluding Dec 30, 2024
abc497d
sperate sgl-kernel with amd backend
BruceXcluding Dec 31, 2024
4bb3332
Merge 'main' into 'main'
Jan 2, 2025
b10c089
Clang format
BruceXcluding Jan 2, 2025
bf2ad5d
Merge branch 'main' into main
zhyncs Jan 2, 2025
3b63a5f
Merge branch 'main' into main
zhyncs Jan 2, 2025
7b8d375
Merge branch 'main' into main
HaiShaw Jan 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,10 @@ def _decode_grouped_att_m_fwd(
Lk = k_buffer.shape[-1]
Lv = v_buffer.shape[-1]

# [TODO] work around shmem limit on MI3xx
if is_hip_ and Lk >= 576:
HaiShaw marked this conversation as resolved.
Show resolved Hide resolved
BLOCK = 16

if Lk == 576:
BLOCK_DMODEL = 512
BLOCK_DPE = 64
Expand Down
10 changes: 5 additions & 5 deletions python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,9 +477,9 @@ def invoke_fused_moe_kernel(

padded_size = 0
if use_fp8_w8a8:
padded_size = padding_size
assert B_scale is not None
if block_shape is None:
padded_size = padding_size
A, A_scale = ops.scaled_fp8_quant(A, A_scale)
else:
assert len(block_shape) == 2
Expand Down Expand Up @@ -614,7 +614,7 @@ def get_default_config(
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 32,
"num_warps": 8,
"num_stages": 4,
"num_stages": 2 if is_hip_flag else 4,
}
if M <= E:
config = {
Expand All @@ -623,7 +623,7 @@ def get_default_config(
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 4,
"num_stages": 2 if is_hip_flag else 4,
}
else:
# Block-wise quant: BLOCK_SIZE_K must be divisable by block_shape[1]
Expand All @@ -633,7 +633,7 @@ def get_default_config(
"BLOCK_SIZE_K": block_shape[1],
"GROUP_SIZE_M": 32,
"num_warps": 4,
"num_stages": 3,
"num_stages": 2 if is_hip_flag else 3,
}
else:
config = {
Expand Down Expand Up @@ -878,7 +878,7 @@ def fused_experts_impl(
block_shape: Optional[List[int]] = None,
):
padded_size = padding_size
if not use_fp8_w8a8:
if not use_fp8_w8a8 or block_shape is not None:
padded_size = 0

# Check constraints.
Expand Down
Loading