diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py index ec4bb2b0519..e3a859ed685 100755 --- a/examples/llm_ptq/example_utils.py +++ b/examples/llm_ptq/example_utils.py @@ -312,13 +312,13 @@ def get_inlined_mtp_prefixes(config: Any) -> list[str]: def _keys_to_prefixes(keys: Iterable[str]) -> set[str]: """Invert separate-file MTP keys into the prefixes the exporter needs for exclude_modules. ``"mtp.fc.weight"`` → ``{"mtp"}``; ``"mtp.layers.0.q_proj.weight"`` → - ``{"mtp", "mtp.layers.0"}``. Caller must filter out inlined keys; otherwise - ``"model.layers.78.eh_proj.weight"`` would emit ``"model"`` as a prefix. + ``{"mtp", "mtp.layers.0"}``. ``"model"`` top-level is dropped to avoid the + ``"model*"`` wildcard covering the whole backbone. """ prefixes: set[str] = set() for key in keys: parts = key.split(".") - if parts: + if parts and parts[0] != "model": prefixes.add(parts[0]) for i, part in enumerate(parts): if part == "layers" and i + 1 < len(parts) and parts[i + 1].isdigit(): diff --git a/tests/examples/llm_ptq/test_example_utils.py b/tests/examples/llm_ptq/test_example_utils.py index 7eccb4c46ea..0bbc31dcde0 100644 --- a/tests/examples/llm_ptq/test_example_utils.py +++ b/tests/examples/llm_ptq/test_example_utils.py @@ -134,6 +134,16 @@ def test_load_mtp_weights_separate_indexed_shard(tmp_path): assert set(orphans) == set(mtp_tensors) +def test_keys_to_prefixes_drops_model_top_level(): + # nvbug 6108133: inlined keys like "model.layers.92.X" must NOT emit "model" + # as a top-level prefix (would become "model*" excluding the whole backbone). + out = example_utils._keys_to_prefixes( + ["model.layers.92.eh_proj.weight", "mtp.fc.weight", "mtp.layers.0.q_proj.weight"] + ) + assert "model" not in out + assert out == {"mtp", "mtp.layers.0", "model.layers.92"} + + def test_load_mtp_weights_no_mtp_returns_empty(tmp_path): # Also pins the ``num_nextn_predict_layers=None`` regression: some configs # set the field explicitly to None, which must not crash ``int(None)``.