diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py index 4ac8b7f02..99c5cb95a 100755 --- a/examples/llm_ptq/example_utils.py +++ b/examples/llm_ptq/example_utils.py @@ -180,7 +180,7 @@ def build_quant_cfg( quant_cfg["quant_cfg"]["*image*"] = {"enable": False} quant_cfg["quant_cfg"]["*vision*"] = {"enable": False} - if model_type == "qwen3next" and qformat == "nvfp4": + if model_type in ["qwen3moe", "qwen3next"] and qformat == "nvfp4": # Disable the attention projection layers to retain accuracy quant_cfg["quant_cfg"]["model*.*attn*in_proj*"] = {"enable": False} quant_cfg["quant_cfg"]["model*.*attn*q_proj*"] = {"enable": False} diff --git a/modelopt/torch/export/model_utils.py b/modelopt/torch/export/model_utils.py index 4af41749e..5a24429ad 100755 --- a/modelopt/torch/export/model_utils.py +++ b/modelopt/torch/export/model_utils.py @@ -29,6 +29,7 @@ "MPT": "mpt", "Bloom": "bloom", "ChatGLM": "chatglm", + "Qwen3Moe": "qwen3moe", "Qwen3Next": "qwen3next", "QWen": "qwen", "RecurrentGemma": "recurrentgemma",