From d28acd3322f6dfafda7cf7c3d8f7c428100f3b14 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Thu, 12 Mar 2026 12:45:37 -0700
Subject: [PATCH 01/12] add: ModelOpt Launcher for Slurm job submission

Add launcher/ module with launch.py that submits quantization, training,
and evaluation jobs to Slurm clusters via nemo-run. Produces identical
code/ layout as nmm-sandbox's slurm.py so the same YAML configs work
in both. Includes Megatron-LM and Model-Optimizer as submodules.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 .gitmodules                      |   6 +
 launcher/__init__.py             |  16 ++
 launcher/launch.py               | 461 +++++++++++++++++++++++++++++++
 launcher/modules/Megatron-LM     |   1 +
 launcher/modules/Model-Optimizer |   1 +
 launcher/pyproject.toml          |  12 +
 6 files changed, 497 insertions(+)
 create mode 100644 .gitmodules
 create mode 100644 launcher/__init__.py
 create mode 100644 launcher/launch.py
 create mode 160000 launcher/modules/Megatron-LM
 create mode 160000 launcher/modules/Model-Optimizer
 create mode 100644 launcher/pyproject.toml

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..23a5af209
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "launcher/modules/Megatron-LM"]
+	path = launcher/modules/Megatron-LM
+	url = https://github.com/AAnoosheh/Megatron-LM.git
+[submodule "launcher/modules/Model-Optimizer"]
+	path = launcher/modules/Model-Optimizer
+	url = https://github.com/NVIDIA/Model-Optimizer.git
diff --git a/launcher/__init__.py b/launcher/__init__.py
new file mode 100644
index 000000000..11b92d8b7
--- /dev/null
+++ b/launcher/__init__.py
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters."""
diff --git a/launcher/launch.py b/launcher/launch.py
new file mode 100644
index 000000000..19b462f90
--- /dev/null
+++ b/launcher/launch.py
@@ -0,0 +1,461 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters.
+
+Usage:
+    uv run launch.py task=@configs/quantize/Qwen3-8B.yaml --yes
+    uv run launch.py pipeline=@configs/pipeline/eagle3.yaml --yes
+    uv run launch.py task=@configs/quantize/Qwen3-8B.yaml hf_local=/mnt/hf-local --yes
+
+Environment variables:
+    SLURM_HOST          Slurm login node hostname (required for remote jobs)
+    SLURM_ACCOUNT       Slurm account/partition billing (default: from YAML)
+    SLURM_JOB_DIR       Remote directory for job artifacts
+    SLURM_HF_LOCAL      Path to HuggingFace model cache on the cluster
+    HF_TOKEN            HuggingFace API token
+    NEMORUN_HOME        NeMo Run home directory (default: current working directory)
+"""
+
+import dataclasses
+import getpass
+import json
+import os
+import re
+import warnings
+from dataclasses import dataclass
+
+import nemo_run as run
+import yaml
+
+# ---------------------------------------------------------------------------
+# Slurm configuration
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class SlurmConfig:
+    """Cluster-agnostic Slurm configuration.
+
+    Users define cluster details in their YAML configs or override via CLI.
+    No internal cluster defaults are embedded here.
+    """
+
+    host: str | None = None
+    port: int = 22
+    account: str | None = None
+    partition: str = "batch"
+    container: str | None = None
+    modelopt_install_path: str = "/usr/local/lib/python3.12/dist-packages/modelopt"
+    container_mounts: list[str] | None = None
+    srun_args: list[str] | None = None
+    array: str | None = None
+    nodes: int = 1
+    ntasks_per_node: int = 1
+    gpus_per_node: int = 1
+    local: bool = False
+
+
+@run.cli.factory
+@run.autoconvert
+def slurm_factory(
+    host: str = os.environ.get("SLURM_HOST", ""),
+    account: str = os.environ.get("SLURM_ACCOUNT", ""),
+    partition: str = "batch",
+    nodes: int = 1,
+    ntasks_per_node: int = 1,
+    gpus_per_node: int = 1,
+    container: str = "nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5",
+    modelopt_install_path: str = "/usr/local/lib/python3.12/dist-packages/modelopt",
+    container_mounts: list[str] | None = None,
+    srun_args: list[str] | None = None,
+    array: str | None = None,
+) -> SlurmConfig:
+    """Generic Slurm factory — configure via environment variables or CLI overrides."""
+    if container_mounts is None:
+        hf_local = os.environ.get("SLURM_HF_LOCAL", "/hf-local")
+        container_mounts = ["{}:/hf-local".format(hf_local)]
+    if srun_args is None:
+        srun_args = ["--no-container-mount-home"]
+    return SlurmConfig(
+        host=host,
+        account=account,
+        partition=partition,
+        nodes=nodes,
+        ntasks_per_node=ntasks_per_node,
+        gpus_per_node=gpus_per_node,
+        container=container,
+        modelopt_install_path=modelopt_install_path,
+        container_mounts=container_mounts,
+        srun_args=srun_args,
+        array=array,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Default environment variables injected into every job
+# ---------------------------------------------------------------------------
+
+DEFAULT_SLURM_ENV = {
+    "HF_HOME": "/hf-cache",
+    "HF_TOKEN": os.getenv("HF_TOKEN", ""),
+    "MLM_SKIP_INSTALL": "1",
+    "LAUNCH_SCRIPT": "python",
+}
+
+DEFAULT_LOCAL_ENV = {
+    "HF_HOME": "/hf-cache",
+    "HF_TOKEN": os.getenv("HF_TOKEN", ""),
+    "MLM_SKIP_INSTALL": "1",
+}
+
+
+# ---------------------------------------------------------------------------
+# Task and pipeline dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class SandboxTask:
+    """A single task with a script, slurm config, args, and environment."""
+
+    script: str = None
+    slurm_config: SlurmConfig = None
+    args: list[str] = None
+    environment: list[dict] = None
+    yaml_file: str = None
+
+
+@dataclass
+class SandboxTask0(SandboxTask):
+    """Task slot 0 in a pipeline."""
+
+
+@dataclass
+class SandboxTask1(SandboxTask):
+    """Task slot 1 in a pipeline."""
+
+
+@dataclass
+class SandboxTask2(SandboxTask):
+    """Task slot 2 in a pipeline."""
+
+
+@dataclass
+class SandboxTask3(SandboxTask):
+    """Task slot 3 in a pipeline."""
+
+
+@dataclass
+class SandboxTask4(SandboxTask):
+    """Task slot 4 in a pipeline."""
+
+
+def create_task_from_yaml(yaml_file: str) -> SandboxTask:
+    """Create a SandboxTask from a YAML config file."""
+    with open(yaml_file) as file:
+        config_from_yaml = yaml.safe_load(file)
+
+    script = config_from_yaml["script"]
+    function_name = config_from_yaml["slurm_config"].pop("_factory_")
+    slurm_config = globals()[function_name](**config_from_yaml["slurm_config"])
+    args = config_from_yaml.get("args", None)
+    environment = config_from_yaml.get("environment", None)
+
+    return SandboxTask(script=script, slurm_config=slurm_config, args=args, environment=environment)
+
+
+@dataclass
+class GlobalVariables:
+    """Shared variables for <<global_vars.X>> interpolation in pipeline YAMLs."""
+
+    hf_model: str = None
+    hf_data: str = None
+
+
+@dataclass
+class SandboxPipeline:
+    """A multi-task pipeline with shared global variables and task dependencies."""
+
+    global_vars: GlobalVariables = None
+
+    task_0: SandboxTask0 = None
+    task_1: SandboxTask1 = None
+    task_2: SandboxTask2 = None
+    task_3: SandboxTask3 = None
+    task_4: SandboxTask4 = None
+    tasks: list[SandboxTask] = None
+
+    test_level: int = 0
+    allow_to_fail: bool = False
+    skip: bool = False
+    note: str = ""
+    task_configs: list[str] = None
+    experiment = None
+
+    def __post_init__(self):
+        if self.tasks is None:
+            self.tasks = []
+            for i in range(5):
+                task = getattr(self, "task_{}".format(i), None)
+                if task is not None:
+                    self.tasks += [task]
+        if self.task_configs is not None:
+            self.tasks += [
+                create_task_from_yaml(yaml_file=yaml_file) for yaml_file in self.task_configs
+            ]
+
+        if self.global_vars is not None:
+            global_vars_dict = {
+                k: v for k, v in dataclasses.asdict(self.global_vars).items() if v is not None
+            }
+
+            def _resolve(s):
+                if not isinstance(s, str):
+                    return s
+                return re.sub(
+                    r"<<global_vars\.(\w+)>>",
+                    lambda m: global_vars_dict.get(m.group(1), m.group(0)),
+                    s,
+                )
+
+            for task in self.tasks:
+                if task.environment:
+                    if isinstance(task.environment, list):
+                        task.environment = [
+                            {k: _resolve(v) for k, v in item.items()} for item in task.environment
+                        ]
+                    else:
+                        task.environment = {k: _resolve(v) for k, v in task.environment.items()}
+                if task.args:
+                    task.args = [_resolve(a) for a in task.args]
+
+
+# ---------------------------------------------------------------------------
+# Code packager — sync only the necessary source trees to the cluster
+# ---------------------------------------------------------------------------
+
+# Resolve paths relative to Model-Optimizer root (parent of launcher/)
+LAUNCHER_DIR = os.path.dirname(os.path.abspath(__file__))
+MODELOPT_ROOT = os.path.dirname(LAUNCHER_DIR)
+
+# All paths relative to LAUNCHER_DIR so code/ mirrors the launcher directory.
+# This produces the same layout as nmm-sandbox's slurm.py:
+#   code/modules/Megatron-LM/megatron/...
+#   code/modules/Model-Optimizer/modelopt/...
+#   code/services/...
+packager = run.PatternPackager(
+    include_pattern=[
+        "modules/Megatron-LM/megatron/*",
+        "modules/Megatron-LM/examples/*",
+        "modules/Megatron-LM/*.py",
+        "modules/Model-Optimizer/modelopt/*",
+        "modules/Model-Optimizer/examples/*",
+        "services/*",
+        "tests/*",
+    ],
+    relative_path=[LAUNCHER_DIR] * 7,
+)
+
+
+# ---------------------------------------------------------------------------
+# Executor builders
+# ---------------------------------------------------------------------------
+
+
+def get_slurm_executor(user, identity, slurm_config, experiment_id, job_dir, task_name):
+    """Build a SlurmExecutor for remote job submission."""
+    container_mounts = slurm_config.container_mounts or []
+
+    scratch_dst = "/scratchspace"
+    scratch_src = job_dir + "/cicd/" + experiment_id
+    modelopt_dst = slurm_config.modelopt_install_path
+    modelopt_src = (
+        job_dir
+        + "/cicd/"
+        + experiment_id
+        + "/{}/code/modules/Model-Optimizer/modelopt".format(task_name)
+    )
+    container_mounts = [
+        *container_mounts,
+        scratch_src + ":" + scratch_dst,
+        modelopt_src + ":" + modelopt_dst,
+    ]
+
+    tunnel = run.SSHTunnel(
+        host=slurm_config.host,
+        user=getpass.getuser() if user is None else user,
+        port=slurm_config.port,
+        job_dir=job_dir,
+        identity=identity,
+    )
+
+    executor = run.SlurmExecutor(
+        account=slurm_config.account,
+        partition=slurm_config.partition,
+        ntasks_per_node=slurm_config.ntasks_per_node,
+        gpus_per_node=slurm_config.gpus_per_node,
+        nodes=slurm_config.nodes,
+        tunnel=tunnel,
+        container_image=slurm_config.container,
+        container_mounts=container_mounts,
+        array=slurm_config.array,
+        time="04:00:00",
+        mem="0",
+        retries=0,
+        packager=packager,
+        srun_args=slurm_config.srun_args,
+    )
+    return executor
+
+
+def get_docker_executor(hf_local, slurm_config, experiment_id, job_dir, task_name):
+    """Build a DockerExecutor for local GPU jobs."""
+    if slurm_config.local:
+        container_mounts = list(slurm_config.container_mounts or [])
+    else:
+        container_mounts = []
+    container_mounts += [hf_local + ":/hf-local", job_dir + "/cicd:/cicd"]
+
+    scratch_dst = "/scratchspace"
+    scratch_src = job_dir + "/cicd/" + experiment_id + "/" + task_name
+    modelopt_dst = slurm_config.modelopt_install_path
+    modelopt_src = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")
+    container_mounts += [scratch_src + ":" + scratch_dst, modelopt_src + ":" + modelopt_dst]
+
+    executor = run.DockerExecutor(
+        num_gpus=-1,
+        runtime="nvidia",
+        ipc_mode="host",
+        container_image=slurm_config.container,
+        volumes=container_mounts,
+        additional_kwargs={"user": "{}:{}".format(os.getuid(), os.getgid())},
+        packager=packager,
+    )
+    return executor
+
+
+# ---------------------------------------------------------------------------
+# Main entrypoint
+# ---------------------------------------------------------------------------
+
+
+@run.cli.entrypoint
+def launch(
+    ctx: run.cli.RunContext,
+    job_name: str = "01_job",
+    job_dir: str = os.environ.get("SLURM_JOB_DIR", os.path.expanduser("~/experiments")),
+    task: SandboxTask | None = None,
+    pipeline: SandboxPipeline | None = None,
+    hf_local: str | None = None,
+    user: str = getpass.getuser(),
+    identity: str | None = None,
+) -> None:
+    """Launch ModelOpt jobs on Slurm or locally with Docker.
+
+    Args:
+        job_name: Name of the job.
+        job_dir: Remote directory for job artifacts.
+        task: Single task config (from YAML).
+        pipeline: Multi-task pipeline config (from YAML).
+        hf_local: Path to local HF cache (enables local Docker execution).
+        user: SSH user for Slurm tunnel.
+        identity: SSH identity file for Slurm tunnel.
+    """
+    if "NEMORUN_HOME" not in os.environ:
+        warnings.warn("NEMORUN_HOME is not set. Defaulting to current working directory.")
+    run.config.set_nemorun_home(os.environ.get("NEMORUN_HOME", os.getcwd()))
+
+    if hf_local is not None:
+        job_dir = os.getcwd() + "/experiments"
+
+    job_table = {}
+
+    if task is not None:
+        job_table[job_name] = SandboxPipeline(tasks=[task])
+    elif pipeline is not None:
+        job_table[job_name] = pipeline
+    else:
+        print("No task or pipeline provided. Use task=@<yaml> or pipeline=@<yaml>.")
+        return
+
+    for job_name, job in job_table.items():  # noqa: PLR1704
+        if job.skip:
+            continue
+
+        dependency = None
+        exp = run.Experiment("modelopt", log_level="INFO")
+        job.experiment = exp
+
+        with exp:
+            for task_id, task in enumerate(job.tasks):  # noqa: PLR1704
+                task_name = job_name + "_" + str(task_id)
+                task_args = [] if task.args is None else task.args
+
+                task_env = {}
+                if task.environment is not None:
+                    if isinstance(task.environment, list):
+                        for item in task.environment:
+                            task_env.update(item.items())
+                    else:
+                        task_env = task.environment
+                for k, v in task_env.items():
+                    task_env[k] = "" if v is None else str(v)
+                if hf_local is not None:
+                    executor = get_docker_executor(
+                        hf_local, task.slurm_config, exp._id, job_dir, task_name
+                    )
+                    task_env.update(DEFAULT_LOCAL_ENV)
+                else:
+                    executor = get_slurm_executor(
+                        user, identity, task.slurm_config, exp._id, job_dir, task_name
+                    )
+                    task_env.update(DEFAULT_SLURM_ENV)
+
+                task_instance = run.Script(task.script, args=task_args, env=task_env)
+                print(
+                    "job {} task {} slurm_config: {}".format(job_name, task_id, task.slurm_config)
+                )
+
+                if dependency is None:
+                    dependency = exp.add(
+                        task_instance, tail_logs=True, name=task_name, executor=executor
+                    )
+                else:
+                    dependency = exp.add(
+                        task_instance,
+                        tail_logs=True,
+                        name=task_name,
+                        executor=executor,
+                        dependencies=[dependency],
+                    )
+
+            exp.run(detach=ctx.detach)
+
+        # Write metadata for downstream tools
+        metadata = {
+            "experiment_id": exp._id,
+            "job_name": job_name,
+            "allow_to_fail": job.allow_to_fail,
+            "note": job.note,
+        }
+        metadata_path = os.path.join("experiments", "modelopt", exp._id, "metadata.json")
+        os.makedirs(os.path.dirname(metadata_path), exist_ok=True)
+        with open(metadata_path, "w") as f:
+            json.dump(metadata, f)
+
+
+if __name__ == "__main__":
+    run.cli.main(launch)
diff --git a/launcher/modules/Megatron-LM b/launcher/modules/Megatron-LM
new file mode 160000
index 000000000..1e064f361
--- /dev/null
+++ b/launcher/modules/Megatron-LM
@@ -0,0 +1 @@
+Subproject commit 1e064f361256f34bf179c0cb808fd6287538f85a
diff --git a/launcher/modules/Model-Optimizer b/launcher/modules/Model-Optimizer
new file mode 160000
index 000000000..69c0d4794
--- /dev/null
+++ b/launcher/modules/Model-Optimizer
@@ -0,0 +1 @@
+Subproject commit 69c0d47946086d032e665ecf59a9ff28dc32f5b8
diff --git a/launcher/pyproject.toml b/launcher/pyproject.toml
new file mode 100644
index 000000000..3a11c2a47
--- /dev/null
+++ b/launcher/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "modelopt-launcher"
+version = "0.1.0"
+description = "ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters"
+requires-python = ">=3.10"
+dependencies = [
+    "nemo_run",
+    "pyyaml",
+]
+
+[tool.uv]
+dev-dependencies = []

From f3d302008c453f0ddd74f0fe1638e46858b0950b Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Fri, 13 Mar 2026 10:23:07 -0700
Subject: [PATCH 02/12] add: shared core.py, slurm_config, services, and
 Qwen3-8B example

Extract shared logic (dataclasses, executor builders, run loop, version
reporting) into core.py. Both launch.py and nmm-sandbox's slurm.py
import from core.py to avoid divergence. Add slurm_config.py with
generic env-var-driven factory, service scripts, Qwen3-8B PTQ example,
and README with usage, flags, and bug reporting instructions.

Verified: same YAML produces identical MMLU 0.736 on OCI-HSG and 0.719
locally via both slurm.py and launch.py.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml   |  13 +
 launcher/README.md                            | 204 ++++++++
 launcher/core.py                              | 477 ++++++++++++++++++
 launcher/launch.py                            | 411 ++-------------
 launcher/pyproject.toml                       |   6 +-
 .../services/megatron-lm/quantize/quantize.sh |  47 ++
 launcher/services/service_utils.sh            |  62 +++
 launcher/slurm_config.py                      |  77 +++
 8 files changed, 916 insertions(+), 381 deletions(-)
 create mode 100644 launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
 create mode 100644 launcher/README.md
 create mode 100644 launcher/core.py
 create mode 100755 launcher/services/megatron-lm/quantize/quantize.sh
 create mode 100755 launcher/services/service_utils.sh
 create mode 100644 launcher/slurm_config.py

diff --git a/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml b/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
new file mode 100644
index 000000000..e2011c2ae
--- /dev/null
+++ b/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
@@ -0,0 +1,13 @@
+script: services/megatron-lm/quantize/quantize.sh
+args:
+  - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
+  - --calib-size 32
+environment:
+  - MLM_MODEL_CFG: Qwen/Qwen3-8B
+  - QUANT_CFG: NVFP4_DEFAULT_CFG
+  - TP: 1
+slurm_config:
+  _factory_: "slurm_factory"
+  nodes: 1
+  ntasks_per_node: 4
+  gpus_per_node: 4
diff --git a/launcher/README.md b/launcher/README.md
new file mode 100644
index 000000000..7f87782d8
--- /dev/null
+++ b/launcher/README.md
@@ -0,0 +1,204 @@
+# ModelOpt Launcher
+
+Submit ModelOpt quantization, training, and evaluation jobs to Slurm clusters or run them locally with Docker.
+
+## Quick Start
+
+```bash
+# Install dependencies
+curl -LsSf https://astral.sh/uv/install.sh | sh
+git submodule update --init --recursive
+
+# Run locally (requires local GPUs and Docker)
+uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml hf_local=/mnt/hf-local --yes
+
+# Run on a Slurm cluster
+export SLURM_HOST=login-node.example.com
+export SLURM_ACCOUNT=my_account
+export SLURM_HF_LOCAL=/shared/hf-local
+export SLURM_JOB_DIR=/shared/experiments
+uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+```
+
+## Environment Variables
+
+| Variable | Description | Required |
+|---|---|---|
+| `SLURM_HOST` | Slurm login node hostname | Yes (remote jobs) |
+| `SLURM_ACCOUNT` | Slurm account for billing | Yes (remote jobs) |
+| `SLURM_JOB_DIR` | Remote directory for job artifacts | Yes (remote jobs) |
+| `SLURM_HF_LOCAL` | Path to HuggingFace model cache on the cluster | Yes (remote jobs) |
+| `HF_TOKEN` | HuggingFace API token | No |
+| `NEMORUN_HOME` | NeMo Run home directory (default: cwd) | No |
+
+## Directory Structure
+
+```text
+launcher/
+├── launch.py                    # Main entrypoint
+├── slurm_config.py              # SlurmConfig dataclass and factory
+├── pyproject.toml               # Dependencies (nemo-run, pyyaml)
+├── services/                    # Shell scripts executed on the cluster
+│   ├── service_utils.sh         # Error handling, MPI rank utilities
+│   └── megatron-lm/quantize/
+│       └── quantize.sh          # PTQ quantization + MMLU evaluation
+├── Qwen/Qwen3-8B/              # Example: Qwen3-8B quantization config
+│   └── megatron_lm_ptq.yaml
+└── modules/                     # Git submodules
+    ├── Megatron-LM/             # NVIDIA Megatron-LM training framework
+    └── Model-Optimizer/         # NVIDIA ModelOpt library
+```
+
+## Task YAML Format
+
+A task YAML defines the script to run, its arguments, environment variables, and Slurm configuration:
+
+```yaml
+script: services/megatron-lm/quantize/quantize.sh
+args:
+  - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
+  - --calib-size 32
+environment:
+  - MLM_MODEL_CFG: Qwen/Qwen3-8B
+  - QUANT_CFG: NVFP4_DEFAULT_CFG
+  - TP: 1
+slurm_config:
+  _factory_: "slurm_factory"
+  nodes: 1
+  ntasks_per_node: 4
+  gpus_per_node: 4
+```
+
+### Overriding Parameters
+
+Any parameter can be overridden from the command line:
+
+```bash
+# Change the number of nodes
+uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
+    task.slurm_config.nodes=2 --yes
+
+# Change the container image
+uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
+    task.slurm_config.container=nvcr.io/nvidia/tensorrt-llm/release:1.3.0 --yes
+```
+
+### Useful Flags
+
+| Flag | Description |
+|---|---|
+| `--yes` / `-y` | Skip confirmation prompt |
+| `-v` | Verbose output |
+| `--dryrun` | Resolve and print the full config without running |
+| `--to-yaml output.yaml` | Dump the resolved config to a YAML file without running |
+| `detach=true` | Submit the job and return immediately (don't wait for completion) |
+
+```bash
+# Preview the resolved config (all factory defaults expanded)
+uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --dryrun --yes -v
+
+# Dump resolved config to file for inspection or reproducibility
+uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml resolved.yaml
+
+# Reproduce from a dumped config (remove the first _partial_ line)
+tail -n +2 resolved.yaml > clean.yaml
+uv run launch.py --yaml clean.yaml --yes
+
+# Submit and detach
+uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml detach=true --yes
+```
+
+## Pipeline YAML Format
+
+A pipeline chains multiple tasks with shared variables and sequential dependencies:
+
+```yaml
+global_vars:
+  hf_model: /hf-local/Qwen/Qwen3-8B
+
+task_0:
+  script: services/megatron-lm/quantize/quantize.sh
+  environment:
+    - HF_MODEL_CKPT: <<global_vars.hf_model>>
+  slurm_config:
+    _factory_: "slurm_factory"
+    nodes: 1
+
+task_1:
+  script: services/megatron-lm/export/export.sh
+  environment:
+    - HF_MODEL_CKPT: <<global_vars.hf_model>>
+  slurm_config:
+    _factory_: "slurm_factory"
+    nodes: 1
+```
+
+Launch with:
+
+```bash
+uv run launch.py pipeline=@my_pipeline.yaml --yes
+```
+
+Tasks run sequentially — `task_1` starts only after `task_0` completes. The `<<global_vars.X>>` syntax allows sharing values across tasks.
+
+## Adding a New Model
+
+1. Create a directory: `<Organization>/<ModelName>/`
+2. Add a YAML config (e.g., `megatron_lm_ptq.yaml`) following the task format above
+3. Set `MLM_MODEL_CFG` to the HuggingFace model ID
+4. Choose `QUANT_CFG` (e.g., `NVFP4_DEFAULT_CFG`, `INT8_DEFAULT_CFG`)
+5. Set `nodes`, `ntasks_per_node`, `gpus_per_node` based on model size
+
+## How It Works
+
+1. `launch.py` parses the YAML and creates a `SandboxTask` with a `SlurmConfig`
+2. Code is packaged via `PatternPackager` — only `modules/Megatron-LM/`, `modules/Model-Optimizer/`, and `services/` are synced
+3. For remote jobs: code is rsynced to the cluster, an sbatch script is generated and submitted via SSH
+4. For local jobs: a Docker container is launched with the same container image and mounts
+5. The `code/` directory on the cluster mirrors the launcher structure:
+
+```text
+code/
+├── modules/
+│   ├── Megatron-LM/megatron/...
+│   └── Model-Optimizer/modelopt/...
+└── services/...
+```
+
+## Reporting Bugs
+
+When filing a bug report, please include:
+
+1. **Version summary** — printed at the start of every run:
+
+   ```text
+   ============================================================
+   Version Report
+   ============================================================
+     Launcher                       d28acd33     (main)
+     Megatron-LM                    1e064f361    (main)
+     Model-Optimizer                69c0d479     (main)
+   ============================================================
+   ```
+
+2. **Reproducible config** — dump with `--to-yaml`:
+
+   ```bash
+   uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml bug_report.yaml
+   ```
+
+3. **Error output** — the relevant error message or traceback from the job log.
+
+File issues at: <https://github.com/NVIDIA/Model-Optimizer/issues>
+
+## Compatibility with nmm-sandbox
+
+This launcher produces the same `code/` layout as [nmm-sandbox](https://gitlab-master.nvidia.com/omniml/integration/nmm-sandbox)'s `slurm.py`. The same YAML configs work with both:
+
+```bash
+# From nmm-sandbox (internal)
+uv run slurm.py task=@modules/Model-Optimizer/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+
+# From Model-Optimizer/launcher (public)
+uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+```
diff --git a/launcher/core.py b/launcher/core.py
new file mode 100644
index 000000000..f75035f6e
--- /dev/null
+++ b/launcher/core.py
@@ -0,0 +1,477 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shared core logic for ModelOpt launcher and nmm-sandbox slurm.py.
+
+This module contains all dataclasses, executor builders, and the job run loop
+shared between the public launcher (launch.py) and the internal CI orchestrator
+(slurm.py). Each caller provides its own config (packager, defaults, experiment
+title) and thin entrypoint.
+"""
+
+import dataclasses
+import getpass
+import json
+import os
+import re
+from dataclasses import dataclass
+
+import nemo_run as run
+import yaml
+
+# ---------------------------------------------------------------------------
+# Default environment variables injected into every job
+# ---------------------------------------------------------------------------
+
+DEFAULT_EXPERIMENT_TITLE = "cicd"
+
+
+def get_default_env(experiment_title=None):
+    """Return (slurm_env, local_env) dicts for the given experiment title."""
+    title = experiment_title or DEFAULT_EXPERIMENT_TITLE
+    slurm_env = {
+        "TRITON_CACHE_DIR": f"/{title}/triton-cache",
+        "HF_HOME": f"/{title}/hf-cache",
+        "HF_TOKEN": os.getenv("HF_TOKEN", ""),
+        "MLM_SKIP_INSTALL": "1",
+        "LAUNCH_SCRIPT": "python",
+    }
+    local_env = {
+        "TRITON_CACHE_DIR": f"/{title}/triton-cache",
+        "HF_HOME": f"/{title}/hf-cache",
+        "HF_TOKEN": os.getenv("HF_TOKEN", ""),
+        "MLM_SKIP_INSTALL": "1",
+    }
+    return slurm_env, local_env
+
+
+# SlurmConfig type — set by the caller via set_slurm_config_type() before use.
+# This allows both slurm.py and launch.py to use their own SlurmConfig class.
+_SLURM_CONFIG_TYPE = None
+
+
+def set_slurm_config_type(cls):
+    """Register the SlurmConfig dataclass type used by SandboxTask."""
+    global _SLURM_CONFIG_TYPE
+    _SLURM_CONFIG_TYPE = cls
+    # Patch SandboxTask's type annotation so nemo-run's CLI parser can resolve factories
+    SandboxTask.__dataclass_fields__["slurm_config"].type = cls
+    SandboxTask.__annotations__["slurm_config"] = cls
+
+
+# ---------------------------------------------------------------------------
+# Task and pipeline dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class SandboxTask:
+    """A single task with a script, slurm config, args, and environment."""
+
+    script: str = None
+    slurm_config: object = None  # Patched at runtime by set_slurm_config_type()
+    args: list[str] = None
+    environment: list[dict[str, str]] = None
+    yaml_file: str = None
+    skip: bool = False
+
+
+@dataclass
+class SandboxTask0(SandboxTask):
+    """Task slot 0 in a pipeline."""
+
+
+@dataclass
+class SandboxTask1(SandboxTask):
+    """Task slot 1 in a pipeline."""
+
+
+@dataclass
+class SandboxTask2(SandboxTask):
+    """Task slot 2 in a pipeline."""
+
+
+@dataclass
+class SandboxTask3(SandboxTask):
+    """Task slot 3 in a pipeline."""
+
+
+@dataclass
+class SandboxTask4(SandboxTask):
+    """Task slot 4 in a pipeline."""
+
+
+def create_task_from_yaml(yaml_file, factory_lookup):
+    """Create a SandboxTask from a YAML config file.
+
+    Args:
+        yaml_file: Path to the YAML config.
+        factory_lookup: Dict mapping factory names to callable factory functions.
+    """
+    with open(yaml_file) as file:
+        config_from_yaml = yaml.safe_load(file)
+
+    script = config_from_yaml["script"]
+    function_name = config_from_yaml["slurm_config"].pop("_factory_")
+    slurm_config = factory_lookup[function_name](**config_from_yaml["slurm_config"])
+    args = config_from_yaml.get("args", None)
+    environment = config_from_yaml.get("environment", None)
+
+    return SandboxTask(script=script, slurm_config=slurm_config, args=args, environment=environment)
+
+
+@dataclass
+class GlobalVariables:
+    """Shared variables for <<global_vars.X>> interpolation in pipeline YAMLs."""
+
+    hf_model: str = None
+    hf_data: str = None
+
+
+@dataclass
+class SandboxPipeline:
+    """A multi-task pipeline with shared global variables and task dependencies."""
+
+    global_vars: GlobalVariables = None
+
+    task_0: SandboxTask0 = None
+    task_1: SandboxTask1 = None
+    task_2: SandboxTask2 = None
+    task_3: SandboxTask3 = None
+    task_4: SandboxTask4 = None
+    tasks: list[SandboxTask] = None
+
+    test_level: int = 0
+    allow_to_fail: bool = False
+    skip: bool = False
+    note: str = ""
+    task_configs: list[str] = None
+    experiment = None
+
+    # Set by caller — used by create_task_from_yaml
+    _factory_lookup: dict = None
+
+    def __post_init__(self):
+        if self.tasks is None:
+            self.tasks = []
+            for i in range(5):
+                task = getattr(self, f"task_{i}", None)
+                if task is not None:
+                    self.tasks += [task]
+        if self.task_configs is not None and self._factory_lookup is not None:
+            self.tasks += [
+                create_task_from_yaml(yaml_file=yf, factory_lookup=self._factory_lookup)
+                for yf in self.task_configs
+            ]
+
+        if self.global_vars is not None:
+            global_vars_dict = {
+                k: v for k, v in dataclasses.asdict(self.global_vars).items() if v is not None
+            }
+
+            def _resolve(s):
+                if not isinstance(s, str):
+                    return s
+                return re.sub(
+                    r"<<global_vars\.(\w+)>>",
+                    lambda m: global_vars_dict.get(m.group(1), m.group(0)),
+                    s,
+                )
+
+            for task in self.tasks:
+                if task.environment:
+                    if isinstance(task.environment, list):
+                        task.environment = [
+                            {k: _resolve(v) for k, v in item.items()} for item in task.environment
+                        ]
+                    else:
+                        task.environment = {k: _resolve(v) for k, v in task.environment.items()}
+                if task.args:
+                    task.args = [_resolve(a) for a in task.args]
+
+
+# ---------------------------------------------------------------------------
+# Executor builders
+# ---------------------------------------------------------------------------
+
+
+def build_slurm_executor(
+    user,
+    identity,
+    slurm_config,
+    experiment_id,
+    job_dir,
+    task_name,
+    packager,
+    experiment_title="cicd",
+):
+    """Build a SlurmExecutor for remote job submission."""
+    container_mounts = list(slurm_config.container_mounts or [])
+
+    scratch_dst = "/scratchspace"
+    scratch_src = f"{job_dir}/{experiment_title}/{experiment_id}"
+    modelopt_dst = slurm_config.modelopt_install_path
+    modelopt_src = (
+        f"{job_dir}/{experiment_title}/{experiment_id}"
+        f"/{task_name}/code/modules/Model-Optimizer/modelopt"
+    )
+    container_mounts += [
+        f"{scratch_src}:{scratch_dst}",
+        f"{modelopt_src}:{modelopt_dst}",
+        f"{job_dir}/{experiment_title}:/{experiment_title}",
+    ]
+
+    tunnel = run.SSHTunnel(
+        host=slurm_config.host,
+        user=getpass.getuser() if user is None else user,
+        port=slurm_config.port,
+        job_dir=job_dir,
+        identity=identity,
+    )
+
+    executor = run.SlurmExecutor(
+        account=slurm_config.account,
+        partition=slurm_config.partition,
+        ntasks_per_node=slurm_config.ntasks_per_node,
+        gpus_per_node=slurm_config.gpus_per_node,
+        nodes=slurm_config.nodes,
+        tunnel=tunnel,
+        container_image=slurm_config.container,
+        container_mounts=container_mounts,
+        array=slurm_config.array,
+        time="04:00:00",
+        mem="0",
+        retries=0,
+        packager=packager,
+        srun_args=slurm_config.srun_args,
+    )
+    return executor
+
+
+def build_docker_executor(
+    hf_local,
+    slurm_config,
+    experiment_id,
+    job_dir,
+    task_name,
+    packager,
+    modelopt_src_path=None,
+    experiment_title="cicd",
+):
+    """Build a DockerExecutor for local GPU jobs."""
+    if slurm_config.local:
+        container_mounts = list(slurm_config.container_mounts or [])
+    else:
+        container_mounts = []
+    container_mounts += [f"{hf_local}:/hf-local"]
+
+    scratch_dst = "/scratchspace"
+    scratch_src = os.path.join(job_dir, experiment_title, experiment_id, task_name)
+    os.makedirs(scratch_src, exist_ok=True)
+    modelopt_dst = slurm_config.modelopt_install_path
+    if modelopt_src_path is None:
+        modelopt_src_path = os.path.join(os.getcwd(), "modules/Model-Optimizer/modelopt")
+    exp_title_src = os.path.join(job_dir, experiment_title)
+    os.makedirs(exp_title_src, exist_ok=True)
+    container_mounts += [
+        f"{scratch_src}:{scratch_dst}",
+        f"{modelopt_src_path}:{modelopt_dst}",
+        f"{exp_title_src}:/{experiment_title}",
+    ]
+
+    executor = run.DockerExecutor(
+        num_gpus=-1,
+        runtime="nvidia",
+        ipc_mode="host",
+        container_image=slurm_config.container,
+        volumes=container_mounts,
+        additional_kwargs={"user": f"{os.getuid()}:{os.getgid()}"},
+        packager=packager,
+    )
+    return executor
+
+
+# ---------------------------------------------------------------------------
+# Version reporting
+# ---------------------------------------------------------------------------
+
+
+def _git_info(path):
+    """Get git commit hash and branch for a directory."""
+    import subprocess  # nosec B404
+
+    try:
+        commit = subprocess.run(  # nosec B603 B607
+            ["git", "rev-parse", "--short", "HEAD"],
+            cwd=path,
+            capture_output=True,
+            text=True,
+            timeout=5,
+        ).stdout.strip()
+        branch = subprocess.run(  # nosec B603 B607
+            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+            cwd=path,
+            capture_output=True,
+            text=True,
+            timeout=5,
+        ).stdout.strip()
+        return commit, branch
+    except Exception:
+        return "unknown", "unknown"
+
+
+def report_versions(base_dir):
+    """Print git commit and branch for the launcher and all submodules."""
+    print("=" * 60)
+    print("Version Report")
+    print("=" * 60)
+
+    # Launcher / repo root
+    commit, branch = _git_info(base_dir)
+    print(f"  {'Launcher':<30} {commit:<12} ({branch})")
+
+    # Submodules
+    modules_dir = os.path.join(base_dir, "modules")
+    if os.path.isdir(modules_dir):
+        for name in sorted(os.listdir(modules_dir)):
+            sub_path = os.path.join(modules_dir, name)
+            if os.path.exists(os.path.join(sub_path, ".git")):
+                commit, branch = _git_info(sub_path)
+                print(f"  {name:<30} {commit:<12} ({branch})")
+
+    print("=" * 60)
+
+
+# ---------------------------------------------------------------------------
+# Shared job run loop
+# ---------------------------------------------------------------------------
+
+
+def run_jobs(
+    job_table,
+    hf_local,
+    user,
+    identity,
+    job_dir,
+    packager,
+    default_slurm_env,
+    default_local_env,
+    experiment_title="cicd",
+    detach=False,
+    test_level=0,
+    modelopt_src_path=None,
+    base_dir=None,
+):
+    """Run all jobs in job_table.
+
+    Args:
+        job_table: Dict mapping job_name -> SandboxPipeline.
+        hf_local: Path to local HF cache (None for remote Slurm).
+        user: SSH user.
+        identity: SSH identity file.
+        job_dir: Base directory for job artifacts.
+        packager: PatternPackager instance.
+        default_slurm_env: Default env vars for Slurm jobs.
+        default_local_env: Default env vars for local Docker jobs.
+        experiment_title: Experiment title (e.g., "cicd" or "modelopt").
+        detach: Whether to detach from the experiment.
+        test_level: Only run jobs with test_level <= this value.
+        modelopt_src_path: Path to modelopt source for Docker mounts.
+        base_dir: Base directory for version reporting (default: cwd).
+    """
+    report_versions(base_dir or os.getcwd())
+
+    for job_name, job in job_table.items():
+        if job.test_level > test_level:
+            job.skip = True
+        if job.skip:
+            continue
+
+        dependency = None
+        exp = run.Experiment(experiment_title, log_level="INFO")
+        job.experiment = exp
+
+        with exp:
+            for task_id, task in enumerate(job.tasks):
+                if task.skip:
+                    print(f"job {job_name} task {task_id}: skipped")
+                    continue
+                task_name = f"{job_name}_{task_id}"
+                task_args = [] if task.args is None else task.args
+
+                task_env = {}
+                if task.environment is not None:
+                    if isinstance(task.environment, list):
+                        for item in task.environment:
+                            task_env.update(item.items())
+                    else:
+                        task_env = task.environment
+                for k, v in task_env.items():
+                    task_env[k] = "" if v is None else str(v)
+
+                if hf_local is not None:
+                    executor = build_docker_executor(
+                        hf_local,
+                        task.slurm_config,
+                        exp._id,
+                        job_dir,
+                        task_name,
+                        packager,
+                        modelopt_src_path,
+                        experiment_title,
+                    )
+                    task_env.update(default_local_env)
+                else:
+                    executor = build_slurm_executor(
+                        user,
+                        identity,
+                        task.slurm_config,
+                        exp._id,
+                        job_dir,
+                        task_name,
+                        packager,
+                        experiment_title,
+                    )
+                    task_env.update(default_slurm_env)
+
+                task_instance = run.Script(task.script, args=task_args, env=task_env)
+                print(f"job {job_name} task {task_id} slurm_config: {task.slurm_config}")
+
+                if dependency is None:
+                    dependency = exp.add(
+                        task_instance, tail_logs=True, name=task_name, executor=executor
+                    )
+                else:
+                    dependency = exp.add(
+                        task_instance,
+                        tail_logs=True,
+                        name=task_name,
+                        executor=executor,
+                        dependencies=[dependency],
+                    )
+
+            exp.run(detach=detach)
+
+        # Write metadata for downstream tools
+        metadata = {
+            "experiment_id": exp._id,
+            "job_name": job_name,
+            "allow_to_fail": job.allow_to_fail,
+            "note": job.note,
+        }
+        metadata_path = os.path.join("experiments", experiment_title, exp._id, "metadata.json")
+        os.makedirs(os.path.dirname(metadata_path), exist_ok=True)
+        with open(metadata_path, "w") as f:
+            json.dump(metadata, f)
diff --git a/launcher/launch.py b/launcher/launch.py
index 19b462f90..f6f1d928f 100644
--- a/launcher/launch.py
+++ b/launcher/launch.py
@@ -16,9 +16,8 @@
 """ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters.
 
 Usage:
-    uv run launch.py task=@configs/quantize/Qwen3-8B.yaml --yes
-    uv run launch.py pipeline=@configs/pipeline/eagle3.yaml --yes
-    uv run launch.py task=@configs/quantize/Qwen3-8B.yaml hf_local=/mnt/hf-local --yes
+    uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+    uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml hf_local=/mnt/hf-local --yes
 
 Environment variables:
     SLURM_HOST          Slurm login node hostname (required for remote jobs)
@@ -29,233 +28,26 @@
     NEMORUN_HOME        NeMo Run home directory (default: current working directory)
 """
 
-import dataclasses
 import getpass
-import json
 import os
-import re
 import warnings
-from dataclasses import dataclass
 
 import nemo_run as run
-import yaml
-
-# ---------------------------------------------------------------------------
-# Slurm configuration
-# ---------------------------------------------------------------------------
-
-
-@dataclass
-class SlurmConfig:
-    """Cluster-agnostic Slurm configuration.
-
-    Users define cluster details in their YAML configs or override via CLI.
-    No internal cluster defaults are embedded here.
-    """
-
-    host: str | None = None
-    port: int = 22
-    account: str | None = None
-    partition: str = "batch"
-    container: str | None = None
-    modelopt_install_path: str = "/usr/local/lib/python3.12/dist-packages/modelopt"
-    container_mounts: list[str] | None = None
-    srun_args: list[str] | None = None
-    array: str | None = None
-    nodes: int = 1
-    ntasks_per_node: int = 1
-    gpus_per_node: int = 1
-    local: bool = False
-
-
-@run.cli.factory
-@run.autoconvert
-def slurm_factory(
-    host: str = os.environ.get("SLURM_HOST", ""),
-    account: str = os.environ.get("SLURM_ACCOUNT", ""),
-    partition: str = "batch",
-    nodes: int = 1,
-    ntasks_per_node: int = 1,
-    gpus_per_node: int = 1,
-    container: str = "nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5",
-    modelopt_install_path: str = "/usr/local/lib/python3.12/dist-packages/modelopt",
-    container_mounts: list[str] | None = None,
-    srun_args: list[str] | None = None,
-    array: str | None = None,
-) -> SlurmConfig:
-    """Generic Slurm factory — configure via environment variables or CLI overrides."""
-    if container_mounts is None:
-        hf_local = os.environ.get("SLURM_HF_LOCAL", "/hf-local")
-        container_mounts = ["{}:/hf-local".format(hf_local)]
-    if srun_args is None:
-        srun_args = ["--no-container-mount-home"]
-    return SlurmConfig(
-        host=host,
-        account=account,
-        partition=partition,
-        nodes=nodes,
-        ntasks_per_node=ntasks_per_node,
-        gpus_per_node=gpus_per_node,
-        container=container,
-        modelopt_install_path=modelopt_install_path,
-        container_mounts=container_mounts,
-        srun_args=srun_args,
-        array=array,
-    )
-
-
-# ---------------------------------------------------------------------------
-# Default environment variables injected into every job
-# ---------------------------------------------------------------------------
-
-DEFAULT_SLURM_ENV = {
-    "HF_HOME": "/hf-cache",
-    "HF_TOKEN": os.getenv("HF_TOKEN", ""),
-    "MLM_SKIP_INSTALL": "1",
-    "LAUNCH_SCRIPT": "python",
-}
-
-DEFAULT_LOCAL_ENV = {
-    "HF_HOME": "/hf-cache",
-    "HF_TOKEN": os.getenv("HF_TOKEN", ""),
-    "MLM_SKIP_INSTALL": "1",
-}
-
-
-# ---------------------------------------------------------------------------
-# Task and pipeline dataclasses
-# ---------------------------------------------------------------------------
-
-
-@dataclass
-class SandboxTask:
-    """A single task with a script, slurm config, args, and environment."""
-
-    script: str = None
-    slurm_config: SlurmConfig = None
-    args: list[str] = None
-    environment: list[dict] = None
-    yaml_file: str = None
-
-
-@dataclass
-class SandboxTask0(SandboxTask):
-    """Task slot 0 in a pipeline."""
-
-
-@dataclass
-class SandboxTask1(SandboxTask):
-    """Task slot 1 in a pipeline."""
-
-
-@dataclass
-class SandboxTask2(SandboxTask):
-    """Task slot 2 in a pipeline."""
-
-
-@dataclass
-class SandboxTask3(SandboxTask):
-    """Task slot 3 in a pipeline."""
-
-
-@dataclass
-class SandboxTask4(SandboxTask):
-    """Task slot 4 in a pipeline."""
-
-
-def create_task_from_yaml(yaml_file: str) -> SandboxTask:
-    """Create a SandboxTask from a YAML config file."""
-    with open(yaml_file) as file:
-        config_from_yaml = yaml.safe_load(file)
-
-    script = config_from_yaml["script"]
-    function_name = config_from_yaml["slurm_config"].pop("_factory_")
-    slurm_config = globals()[function_name](**config_from_yaml["slurm_config"])
-    args = config_from_yaml.get("args", None)
-    environment = config_from_yaml.get("environment", None)
-
-    return SandboxTask(script=script, slurm_config=slurm_config, args=args, environment=environment)
-
-
-@dataclass
-class GlobalVariables:
-    """Shared variables for <<global_vars.X>> interpolation in pipeline YAMLs."""
-
-    hf_model: str = None
-    hf_data: str = None
-
-
-@dataclass
-class SandboxPipeline:
-    """A multi-task pipeline with shared global variables and task dependencies."""
-
-    global_vars: GlobalVariables = None
-
-    task_0: SandboxTask0 = None
-    task_1: SandboxTask1 = None
-    task_2: SandboxTask2 = None
-    task_3: SandboxTask3 = None
-    task_4: SandboxTask4 = None
-    tasks: list[SandboxTask] = None
-
-    test_level: int = 0
-    allow_to_fail: bool = False
-    skip: bool = False
-    note: str = ""
-    task_configs: list[str] = None
-    experiment = None
-
-    def __post_init__(self):
-        if self.tasks is None:
-            self.tasks = []
-            for i in range(5):
-                task = getattr(self, "task_{}".format(i), None)
-                if task is not None:
-                    self.tasks += [task]
-        if self.task_configs is not None:
-            self.tasks += [
-                create_task_from_yaml(yaml_file=yaml_file) for yaml_file in self.task_configs
-            ]
-
-        if self.global_vars is not None:
-            global_vars_dict = {
-                k: v for k, v in dataclasses.asdict(self.global_vars).items() if v is not None
-            }
-
-            def _resolve(s):
-                if not isinstance(s, str):
-                    return s
-                return re.sub(
-                    r"<<global_vars\.(\w+)>>",
-                    lambda m: global_vars_dict.get(m.group(1), m.group(0)),
-                    s,
-                )
-
-            for task in self.tasks:
-                if task.environment:
-                    if isinstance(task.environment, list):
-                        task.environment = [
-                            {k: _resolve(v) for k, v in item.items()} for item in task.environment
-                        ]
-                    else:
-                        task.environment = {k: _resolve(v) for k, v in task.environment.items()}
-                if task.args:
-                    task.args = [_resolve(a) for a in task.args]
+from core import SandboxPipeline, SandboxTask, get_default_env, run_jobs, set_slurm_config_type
+from slurm_config import SlurmConfig, slurm_factory  # noqa: F401
 
+set_slurm_config_type(SlurmConfig)
 
 # ---------------------------------------------------------------------------
-# Code packager — sync only the necessary source trees to the cluster
+# Launcher-specific configuration
 # ---------------------------------------------------------------------------
 
-# Resolve paths relative to Model-Optimizer root (parent of launcher/)
 LAUNCHER_DIR = os.path.dirname(os.path.abspath(__file__))
 MODELOPT_ROOT = os.path.dirname(LAUNCHER_DIR)
 
-# All paths relative to LAUNCHER_DIR so code/ mirrors the launcher directory.
-# This produces the same layout as nmm-sandbox's slurm.py:
-#   code/modules/Megatron-LM/megatron/...
-#   code/modules/Model-Optimizer/modelopt/...
-#   code/services/...
+EXPERIMENT_TITLE = "cicd"
+DEFAULT_SLURM_ENV, DEFAULT_LOCAL_ENV = get_default_env(EXPERIMENT_TITLE)
+
 packager = run.PatternPackager(
     include_pattern=[
         "modules/Megatron-LM/megatron/*",
@@ -264,125 +56,38 @@ def _resolve(s):
         "modules/Model-Optimizer/modelopt/*",
         "modules/Model-Optimizer/examples/*",
         "services/*",
-        "tests/*",
     ],
-    relative_path=[LAUNCHER_DIR] * 7,
+    relative_path=[LAUNCHER_DIR] * 6,
 )
 
-
-# ---------------------------------------------------------------------------
-# Executor builders
-# ---------------------------------------------------------------------------
-
-
-def get_slurm_executor(user, identity, slurm_config, experiment_id, job_dir, task_name):
-    """Build a SlurmExecutor for remote job submission."""
-    container_mounts = slurm_config.container_mounts or []
-
-    scratch_dst = "/scratchspace"
-    scratch_src = job_dir + "/cicd/" + experiment_id
-    modelopt_dst = slurm_config.modelopt_install_path
-    modelopt_src = (
-        job_dir
-        + "/cicd/"
-        + experiment_id
-        + "/{}/code/modules/Model-Optimizer/modelopt".format(task_name)
-    )
-    container_mounts = [
-        *container_mounts,
-        scratch_src + ":" + scratch_dst,
-        modelopt_src + ":" + modelopt_dst,
-    ]
-
-    tunnel = run.SSHTunnel(
-        host=slurm_config.host,
-        user=getpass.getuser() if user is None else user,
-        port=slurm_config.port,
-        job_dir=job_dir,
-        identity=identity,
-    )
-
-    executor = run.SlurmExecutor(
-        account=slurm_config.account,
-        partition=slurm_config.partition,
-        ntasks_per_node=slurm_config.ntasks_per_node,
-        gpus_per_node=slurm_config.gpus_per_node,
-        nodes=slurm_config.nodes,
-        tunnel=tunnel,
-        container_image=slurm_config.container,
-        container_mounts=container_mounts,
-        array=slurm_config.array,
-        time="04:00:00",
-        mem="0",
-        retries=0,
-        packager=packager,
-        srun_args=slurm_config.srun_args,
-    )
-    return executor
-
-
-def get_docker_executor(hf_local, slurm_config, experiment_id, job_dir, task_name):
-    """Build a DockerExecutor for local GPU jobs."""
-    if slurm_config.local:
-        container_mounts = list(slurm_config.container_mounts or [])
-    else:
-        container_mounts = []
-    container_mounts += [hf_local + ":/hf-local", job_dir + "/cicd:/cicd"]
-
-    scratch_dst = "/scratchspace"
-    scratch_src = job_dir + "/cicd/" + experiment_id + "/" + task_name
-    modelopt_dst = slurm_config.modelopt_install_path
-    modelopt_src = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")
-    container_mounts += [scratch_src + ":" + scratch_dst, modelopt_src + ":" + modelopt_dst]
-
-    executor = run.DockerExecutor(
-        num_gpus=-1,
-        runtime="nvidia",
-        ipc_mode="host",
-        container_image=slurm_config.container,
-        volumes=container_mounts,
-        additional_kwargs={"user": "{}:{}".format(os.getuid(), os.getgid())},
-        packager=packager,
-    )
-    return executor
+MODELOPT_SRC_PATH = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")
 
 
 # ---------------------------------------------------------------------------
-# Main entrypoint
+# Entrypoint
 # ---------------------------------------------------------------------------
 
 
 @run.cli.entrypoint
 def launch(
-    ctx: run.cli.RunContext,
     job_name: str = "01_job",
     job_dir: str = os.environ.get("SLURM_JOB_DIR", os.path.expanduser("~/experiments")),
-    task: SandboxTask | None = None,
-    pipeline: SandboxPipeline | None = None,
-    hf_local: str | None = None,
+    task: SandboxTask = None,
+    pipeline: SandboxPipeline = None,
+    hf_local: str = None,  # noqa: RUF013
     user: str = getpass.getuser(),
-    identity: str | None = None,
+    identity: str = None,  # noqa: RUF013
+    detach: bool = False,
 ) -> None:
-    """Launch ModelOpt jobs on Slurm or locally with Docker.
-
-    Args:
-        job_name: Name of the job.
-        job_dir: Remote directory for job artifacts.
-        task: Single task config (from YAML).
-        pipeline: Multi-task pipeline config (from YAML).
-        hf_local: Path to local HF cache (enables local Docker execution).
-        user: SSH user for Slurm tunnel.
-        identity: SSH identity file for Slurm tunnel.
-    """
+    """Launch ModelOpt jobs on Slurm or locally with Docker."""
     if "NEMORUN_HOME" not in os.environ:
         warnings.warn("NEMORUN_HOME is not set. Defaulting to current working directory.")
     run.config.set_nemorun_home(os.environ.get("NEMORUN_HOME", os.getcwd()))
 
     if hf_local is not None:
-        job_dir = os.getcwd() + "/experiments"
+        job_dir = os.path.join(os.getcwd(), "local_experiments")
 
     job_table = {}
-
     if task is not None:
         job_table[job_name] = SandboxPipeline(tasks=[task])
     elif pipeline is not None:
@@ -391,70 +96,20 @@ def launch(
         print("No task or pipeline provided. Use task=@<yaml> or pipeline=@<yaml>.")
         return
 
-    for job_name, job in job_table.items():  # noqa: PLR1704
-        if job.skip:
-            continue
-
-        dependency = None
-        exp = run.Experiment("modelopt", log_level="INFO")
-        job.experiment = exp
-
-        with exp:
-            for task_id, task in enumerate(job.tasks):  # noqa: PLR1704
-                task_name = job_name + "_" + str(task_id)
-                task_args = [] if task.args is None else task.args
-
-                task_env = {}
-                if task.environment is not None:
-                    if isinstance(task.environment, list):
-                        for item in task.environment:
-                            task_env.update(item.items())
-                    else:
-                        task_env = task.environment
-                for k, v in task_env.items():
-                    task_env[k] = "" if v is None else str(v)
-                if hf_local is not None:
-                    executor = get_docker_executor(
-                        hf_local, task.slurm_config, exp._id, job_dir, task_name
-                    )
-                    task_env.update(DEFAULT_LOCAL_ENV)
-                else:
-                    executor = get_slurm_executor(
-                        user, identity, task.slurm_config, exp._id, job_dir, task_name
-                    )
-                    task_env.update(DEFAULT_SLURM_ENV)
-
-                task_instance = run.Script(task.script, args=task_args, env=task_env)
-                print(
-                    "job {} task {} slurm_config: {}".format(job_name, task_id, task.slurm_config)
-                )
-
-                if dependency is None:
-                    dependency = exp.add(
-                        task_instance, tail_logs=True, name=task_name, executor=executor
-                    )
-                else:
-                    dependency = exp.add(
-                        task_instance,
-                        tail_logs=True,
-                        name=task_name,
-                        executor=executor,
-                        dependencies=[dependency],
-                    )
-
-            exp.run(detach=ctx.detach)
-
-        # Write metadata for downstream tools
-        metadata = {
-            "experiment_id": exp._id,
-            "job_name": job_name,
-            "allow_to_fail": job.allow_to_fail,
-            "note": job.note,
-        }
-        metadata_path = os.path.join("experiments", "modelopt", exp._id, "metadata.json")
-        os.makedirs(os.path.dirname(metadata_path), exist_ok=True)
-        with open(metadata_path, "w") as f:
-            json.dump(metadata, f)
+    run_jobs(
+        job_table=job_table,
+        hf_local=hf_local,
+        user=user,
+        identity=identity,
+        job_dir=job_dir,
+        packager=packager,
+        default_slurm_env=DEFAULT_SLURM_ENV,
+        default_local_env=DEFAULT_LOCAL_ENV,
+        experiment_title=EXPERIMENT_TITLE,
+        detach=detach,
+        modelopt_src_path=MODELOPT_SRC_PATH,
+        base_dir=LAUNCHER_DIR,
+    )
 
 
 if __name__ == "__main__":
diff --git a/launcher/pyproject.toml b/launcher/pyproject.toml
index 3a11c2a47..0e576e5af 100644
--- a/launcher/pyproject.toml
+++ b/launcher/pyproject.toml
@@ -4,9 +4,9 @@ version = "0.1.0"
 description = "ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters"
 requires-python = ">=3.10"
 dependencies = [
-    "nemo_run",
+    "nemo-run@git+https://github.com/NVIDIA-NeMo/Run@2ccf1c9e68acd157da451721b24635bcc83be87e",
     "pyyaml",
 ]
 
-[tool.uv]
-dev-dependencies = []
+[dependency-groups]
+dev = []
diff --git a/launcher/services/megatron-lm/quantize/quantize.sh b/launcher/services/megatron-lm/quantize/quantize.sh
new file mode 100755
index 000000000..d4b3d5248
--- /dev/null
+++ b/launcher/services/megatron-lm/quantize/quantize.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+source ${SCRIPT_DIR}/../../service_utils.sh
+
+util_install_extra_dep
+
+trap 'error_handler $0 $LINENO' ERR # ERROR HANDLER
+###################################################################################################
+
+if [[ -z ${HF_MODEL_CKPT} ]]; then
+    export HF_MODEL_CKPT="/hf-local/${MLM_MODEL_CFG}"
+fi
+export MLM_MODEL_SAVE="/scratchspace/megatron-lm/${MLM_MODEL_CFG}"
+export EXPORT_DIR="/scratchspace/export/${MLM_MODEL_CFG}_${QUANT_CFG}"
+export MLM_SKIP_INSTALL=1
+
+QUANTIZE_EXE="bash modules/Megatron-LM/examples/post_training/modelopt/quantize.sh"
+MMLU_EXE="bash modules/Megatron-LM/examples/post_training/modelopt/mmlu.sh"
+CONVERT_EXE="bash modules/Megatron-LM/examples/post_training/modelopt/convert.sh"
+EXPORT_EXE="bash modules/Megatron-LM/examples/post_training/modelopt/export.sh"
+
+export MLM_EXTRA_ARGS=${@}
+${QUANTIZE_EXE} ${MLM_MODEL_CFG} ${QUANT_CFG}
+
+export MLM_EXTRA_ARGS="--mmlu-dataset /hf-local/cais/mmlu --fraction 0.01 --lower-bound 0.38 --disable-tqdm"
+MLM_MODEL_CKPT=${MLM_MODEL_SAVE} ${MMLU_EXE} ${MLM_MODEL_CFG}
+
+###################################################################################################
+
+# This function handles the exit status (fails the CI).
+exit_handler $0
diff --git a/launcher/services/service_utils.sh b/launcher/services/service_utils.sh
new file mode 100755
index 000000000..f9d15b279
--- /dev/null
+++ b/launcher/services/service_utils.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+native_mpi_rank=$OMPI_COMM_WORLD_RANK
+native_mpi_local_rank=$OMPI_COMM_WORLD_LOCAL_RANK
+# Works with Slurm launching with `--mpi=pmix`
+mpi_rank=${PMIX_RANK:-$native_mpi_rank}
+mpi_local_rank=${PMIX_LOCAL_RANK:-$native_mpi_local_rank}
+
+FAIL=0
+FAIL_EXIT=0
+
+function error_handler {
+    local last_status_code=$?
+    echo "[ERROR] $1:$2 failed with status $last_status_code." >&2
+
+    if [[ "$mpi_rank" -eq 0 ]]; then
+        echo "<REPORT>$1:$2</REPORT>" >&2
+    fi
+    FAIL=1
+    FAIL_EXIT=1
+}
+
+function exit_handler {
+    if [[ $FAIL_EXIT == 1 ]]; then
+        exit 1
+    fi
+}
+
+function report_result {
+    if [[ "$mpi_rank" -eq 0 ]]; then
+        echo "<REPORT>$1</REPORT>"
+    fi
+}
+
+function util_install_extra_dep {
+    if [[ "$mpi_local_rank" -eq 0 ]]; then
+        pip install diskcache
+    fi
+}
+
+LOCAL_NUM_GPUS=$(nvidia-smi --query-gpu=count --format=csv,noheader,nounits | head -n 1)
+printf "RANK ${mpi_rank} GPU count: ${LOCAL_NUM_GPUS}\n"
+
+# Increase the modelopt version number manually
+if [[ "$mpi_local_rank" -eq 0 ]]; then
+    echo "__version__ = '1.0.0'" >> ./modules/Model-Optimizer/modelopt/__init__.py
+fi
diff --git a/launcher/slurm_config.py b/launcher/slurm_config.py
new file mode 100644
index 000000000..53e39aa42
--- /dev/null
+++ b/launcher/slurm_config.py
@@ -0,0 +1,77 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Slurm configuration and factory for the ModelOpt Launcher."""
+
+import os
+from dataclasses import dataclass
+
+import nemo_run as run
+
+
+@dataclass
+class SlurmConfig:
+    """Cluster-agnostic Slurm configuration.
+
+    Users define cluster details in their YAML configs or override via CLI.
+    No internal cluster defaults are embedded here.
+    """
+
+    host: str = None
+    port: int = 22
+    account: str = None
+    partition: str = "batch"
+    container: str = None
+    modelopt_install_path: str = "/usr/local/lib/python3.12/dist-packages/modelopt"
+    container_mounts: list[str] = None
+    srun_args: list[str] = None
+    array: str = None
+    nodes: int = 1
+    ntasks_per_node: int = 1
+    gpus_per_node: int = 1
+    local: bool = False
+
+
+@run.cli.factory
+@run.autoconvert
+def slurm_factory(
+    host: str = os.environ.get("SLURM_HOST", ""),
+    account: str = os.environ.get("SLURM_ACCOUNT", ""),
+    partition: str = "batch",
+    nodes: int = 1,
+    ntasks_per_node: int = 1,
+    gpus_per_node: int = 1,
+    container: str = "nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5",
+    modelopt_install_path: str = "/usr/local/lib/python3.12/dist-packages/modelopt",
+    container_mounts: list[str] = [
+        "{}:/hf-local".format(os.environ.get("SLURM_HF_LOCAL", "/hf-local")),
+    ],
+    srun_args: list[str] = ["--no-container-mount-home"],
+    array: str = None,  # noqa: RUF013
+) -> SlurmConfig:
+    """Generic Slurm factory — configure via environment variables or CLI overrides."""
+    return SlurmConfig(
+        host=host,
+        account=account,
+        partition=partition,
+        nodes=nodes,
+        ntasks_per_node=ntasks_per_node,
+        gpus_per_node=gpus_per_node,
+        container=container,
+        modelopt_install_path=modelopt_install_path,
+        container_mounts=container_mounts,
+        srun_args=srun_args,
+        array=array,
+    )

From f7f9878eb4aa1da80e0db12247975bc498e8f862 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Fri, 13 Mar 2026 17:25:26 -0700
Subject: [PATCH 03/12] fix: add factory registry for task_configs YAML
 resolution

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 launcher/core.py   | 18 +++++++++++++-----
 launcher/launch.py | 12 ++++++++++--
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/launcher/core.py b/launcher/core.py
index f75035f6e..18e22dfe8 100644
--- a/launcher/core.py
+++ b/launcher/core.py
@@ -60,6 +60,7 @@ def get_default_env(experiment_title=None):
 # SlurmConfig type — set by the caller via set_slurm_config_type() before use.
 # This allows both slurm.py and launch.py to use their own SlurmConfig class.
 _SLURM_CONFIG_TYPE = None
+_FACTORY_REGISTRY = {}
 
 
 def set_slurm_config_type(cls):
@@ -71,6 +72,11 @@ def set_slurm_config_type(cls):
     SandboxTask.__annotations__["slurm_config"] = cls
 
 
+def register_factory(name, fn):
+    """Register a factory function by name for task_configs YAML resolution."""
+    _FACTORY_REGISTRY[name] = fn
+
+
 # ---------------------------------------------------------------------------
 # Task and pipeline dataclasses
 # ---------------------------------------------------------------------------
@@ -170,11 +176,13 @@ def __post_init__(self):
                 task = getattr(self, f"task_{i}", None)
                 if task is not None:
                     self.tasks += [task]
-        if self.task_configs is not None and self._factory_lookup is not None:
-            self.tasks += [
-                create_task_from_yaml(yaml_file=yf, factory_lookup=self._factory_lookup)
-                for yf in self.task_configs
-            ]
+        if self.task_configs is not None:
+            lookup = self._factory_lookup or _FACTORY_REGISTRY
+            if lookup:
+                self.tasks += [
+                    create_task_from_yaml(yaml_file=yf, factory_lookup=lookup)
+                    for yf in self.task_configs
+                ]
 
         if self.global_vars is not None:
             global_vars_dict = {
diff --git a/launcher/launch.py b/launcher/launch.py
index f6f1d928f..9d9c9c993 100644
--- a/launcher/launch.py
+++ b/launcher/launch.py
@@ -33,10 +33,18 @@
 import warnings
 
 import nemo_run as run
-from core import SandboxPipeline, SandboxTask, get_default_env, run_jobs, set_slurm_config_type
-from slurm_config import SlurmConfig, slurm_factory  # noqa: F401
+from core import (
+    SandboxPipeline,
+    SandboxTask,
+    get_default_env,
+    register_factory,
+    run_jobs,
+    set_slurm_config_type,
+)
+from slurm_config import SlurmConfig, slurm_factory
 
 set_slurm_config_type(SlurmConfig)
+register_factory("slurm_factory", slurm_factory)
 
 # ---------------------------------------------------------------------------
 # Launcher-specific configuration

From ad1f0d8f98a879cc7c9bd64925a5590b073d2339 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sat, 14 Mar 2026 17:55:15 -0700
Subject: [PATCH 04/12] chg: remove task param from launch.py, update YAML
 format and README

launch.py now only accepts pipeline=@ or --yaml. Update README with
--yaml vs pipeline=@ docs, useful flags, and bug reporting. Update
Qwen3-8B config to new --yaml format with job_name + pipeline.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml |  34 ++--
 launcher/README.md                          | 170 ++++++++++++--------
 launcher/launch.py                          |  20 +--
 3 files changed, 128 insertions(+), 96 deletions(-)

diff --git a/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml b/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
index e2011c2ae..83ed7f4f0 100644
--- a/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
+++ b/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
@@ -1,13 +1,21 @@
-script: services/megatron-lm/quantize/quantize.sh
-args:
-  - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
-  - --calib-size 32
-environment:
-  - MLM_MODEL_CFG: Qwen/Qwen3-8B
-  - QUANT_CFG: NVFP4_DEFAULT_CFG
-  - TP: 1
-slurm_config:
-  _factory_: "slurm_factory"
-  nodes: 1
-  ntasks_per_node: 4
-  gpus_per_node: 4
+job_name: Qwen3-8B_NVFP4_DEFAULT_CFG
+pipeline:
+  skip: false
+  allow_to_fail: false
+  note:
+
+  task_0:
+    script: services/megatron-lm/quantize/quantize.sh
+    args:
+      - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
+      - --calib-size 32
+    environment:
+      - MLM_MODEL_CFG: Qwen/Qwen3-8B
+      - QUANT_CFG: NVFP4_DEFAULT_CFG
+      - TP: 4
+    slurm_config:
+      _factory_: "slurm_factory" # oci_hsg_slurm_factory
+      nodes: 1
+      ntasks_per_node: 4
+      gpus_per_node: 4
+
diff --git a/launcher/README.md b/launcher/README.md
index 7f87782d8..725363341 100644
--- a/launcher/README.md
+++ b/launcher/README.md
@@ -10,14 +10,14 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 git submodule update --init --recursive
 
 # Run locally (requires local GPUs and Docker)
-uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml hf_local=/mnt/hf-local --yes
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml hf_local=/mnt/hf-local --yes
 
 # Run on a Slurm cluster
 export SLURM_HOST=login-node.example.com
 export SLURM_ACCOUNT=my_account
 export SLURM_HF_LOCAL=/shared/hf-local
 export SLURM_JOB_DIR=/shared/experiments
-uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
 ```
 
 ## Environment Variables
@@ -36,37 +36,108 @@ uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
 ```text
 launcher/
 ├── launch.py                    # Main entrypoint
+├── core.py                      # Shared logic (also used by nmm-sandbox's slurm.py)
 ├── slurm_config.py              # SlurmConfig dataclass and factory
 ├── pyproject.toml               # Dependencies (nemo-run, pyyaml)
 ├── services/                    # Shell scripts executed on the cluster
 │   ├── service_utils.sh         # Error handling, MPI rank utilities
 │   └── megatron-lm/quantize/
-│       └── quantize.sh          # PTQ quantization + MMLU evaluation
-├── Qwen/Qwen3-8B/              # Example: Qwen3-8B quantization config
+│       ├── quantize.sh          # PTQ quantization + MMLU evaluation
+│       └── Qwen3-8B.yaml        # Task config for Qwen3-8B
+├── Qwen/Qwen3-8B/              # Example pipeline config
 │   └── megatron_lm_ptq.yaml
 └── modules/                     # Git submodules
     ├── Megatron-LM/             # NVIDIA Megatron-LM training framework
     └── Model-Optimizer/         # NVIDIA ModelOpt library
 ```
 
-## Task YAML Format
+## YAML Config Format
 
-A task YAML defines the script to run, its arguments, environment variables, and Slurm configuration:
+A config YAML defines the job name, pipeline metadata, and one or more tasks:
 
 ```yaml
-script: services/megatron-lm/quantize/quantize.sh
-args:
-  - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
-  - --calib-size 32
-environment:
-  - MLM_MODEL_CFG: Qwen/Qwen3-8B
-  - QUANT_CFG: NVFP4_DEFAULT_CFG
-  - TP: 1
-slurm_config:
-  _factory_: "slurm_factory"
-  nodes: 1
-  ntasks_per_node: 4
-  gpus_per_node: 4
+job_name: Qwen3-8B_NVFP4_DEFAULT_CFG
+pipeline:
+  skip: false
+  allow_to_fail: false
+  note:
+
+  task_0:
+    script: services/megatron-lm/quantize/quantize.sh
+    args:
+      - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
+      - --calib-size 32
+    environment:
+      - MLM_MODEL_CFG: Qwen/Qwen3-8B
+      - QUANT_CFG: NVFP4_DEFAULT_CFG
+      - TP: 1
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 4
+      gpus_per_node: 4
+```
+
+### Multi-task Pipeline
+
+Tasks run sequentially — `task_1` starts only after `task_0` completes:
+
+```yaml
+job_name: Qwen3-8B_quantize_export
+pipeline:
+  global_vars:
+    hf_model: /hf-local/Qwen/Qwen3-8B
+
+  task_0:
+    script: services/megatron-lm/quantize/quantize.sh
+    environment:
+      - HF_MODEL_CKPT: <<global_vars.hf_model>>
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+
+  task_1:
+    script: services/megatron-lm/export/export.sh
+    environment:
+      - HF_MODEL_CKPT: <<global_vars.hf_model>>
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+```
+
+The `<<global_vars.X>>` syntax shares values across tasks.
+
+### `--yaml` vs `pipeline=@`
+
+There are two ways to load a config:
+
+**`--yaml config.yaml`** (recommended) — the YAML maps top-level keys to function arguments.
+The file contains both `job_name` and `pipeline`:
+
+```yaml
+# config.yaml — used with: uv run launch.py --yaml config.yaml --yes
+job_name: Qwen3-8B_NVFP4
+pipeline:
+  task_0:
+    script: services/megatron-lm/quantize/quantize.sh
+    slurm_config:
+      _factory_: "slurm_factory"
+```
+
+**`pipeline=@config.yaml`** — the YAML is a bare `SandboxPipeline` (no `job_name` or `pipeline` wrapper).
+This is useful for reusing pipeline configs across different job names:
+
+```yaml
+# bare_pipeline.yaml — used with: uv run launch.py pipeline=@bare_pipeline.yaml --yes
+task_0:
+  script: services/megatron-lm/quantize/quantize.sh
+  slurm_config:
+    _factory_: "slurm_factory"
+```
+
+```bash
+# With pipeline=@, set job_name separately
+uv run launch.py pipeline=@bare_pipeline.yaml job_name=my_job --yes
 ```
 
 ### Overriding Parameters
@@ -75,12 +146,12 @@ Any parameter can be overridden from the command line:
 
 ```bash
 # Change the number of nodes
-uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
-    task.slurm_config.nodes=2 --yes
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
+    pipeline.task_0.slurm_config.nodes=2 --yes
 
 # Change the container image
-uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
-    task.slurm_config.container=nvcr.io/nvidia/tensorrt-llm/release:1.3.0 --yes
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
+    pipeline.task_0.slurm_config.container=nvcr.io/nvidia/tensorrt-llm/release:1.3.0 --yes
 ```
 
 ### Useful Flags
@@ -95,63 +166,26 @@ uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
 
 ```bash
 # Preview the resolved config (all factory defaults expanded)
-uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --dryrun --yes -v
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --dryrun --yes -v
 
 # Dump resolved config to file for inspection or reproducibility
-uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml resolved.yaml
-
-# Reproduce from a dumped config (remove the first _partial_ line)
-tail -n +2 resolved.yaml > clean.yaml
-uv run launch.py --yaml clean.yaml --yes
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml resolved.yaml
 
 # Submit and detach
-uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml detach=true --yes
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml detach=true --yes
 ```
 
-## Pipeline YAML Format
-
-A pipeline chains multiple tasks with shared variables and sequential dependencies:
-
-```yaml
-global_vars:
-  hf_model: /hf-local/Qwen/Qwen3-8B
-
-task_0:
-  script: services/megatron-lm/quantize/quantize.sh
-  environment:
-    - HF_MODEL_CKPT: <<global_vars.hf_model>>
-  slurm_config:
-    _factory_: "slurm_factory"
-    nodes: 1
-
-task_1:
-  script: services/megatron-lm/export/export.sh
-  environment:
-    - HF_MODEL_CKPT: <<global_vars.hf_model>>
-  slurm_config:
-    _factory_: "slurm_factory"
-    nodes: 1
-```
-
-Launch with:
-
-```bash
-uv run launch.py pipeline=@my_pipeline.yaml --yes
-```
-
-Tasks run sequentially — `task_1` starts only after `task_0` completes. The `<<global_vars.X>>` syntax allows sharing values across tasks.
-
 ## Adding a New Model
 
 1. Create a directory: `<Organization>/<ModelName>/`
-2. Add a YAML config (e.g., `megatron_lm_ptq.yaml`) following the task format above
+2. Add a YAML config (e.g., `megatron_lm_ptq.yaml`) following the format above
 3. Set `MLM_MODEL_CFG` to the HuggingFace model ID
 4. Choose `QUANT_CFG` (e.g., `NVFP4_DEFAULT_CFG`, `INT8_DEFAULT_CFG`)
 5. Set `nodes`, `ntasks_per_node`, `gpus_per_node` based on model size
 
 ## How It Works
 
-1. `launch.py` parses the YAML and creates a `SandboxTask` with a `SlurmConfig`
+1. `launch.py` parses the YAML and creates a `SandboxPipeline` with tasks and `SlurmConfig`
 2. Code is packaged via `PatternPackager` — only `modules/Megatron-LM/`, `modules/Model-Optimizer/`, and `services/` are synced
 3. For remote jobs: code is rsynced to the cluster, an sbatch script is generated and submitted via SSH
 4. For local jobs: a Docker container is launched with the same container image and mounts
@@ -184,7 +218,7 @@ When filing a bug report, please include:
 2. **Reproducible config** — dump with `--to-yaml`:
 
    ```bash
-   uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml bug_report.yaml
+   uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml bug_report.yaml
    ```
 
 3. **Error output** — the relevant error message or traceback from the job log.
@@ -197,8 +231,8 @@ This launcher produces the same `code/` layout as [nmm-sandbox](https://gitlab-m
 
 ```bash
 # From nmm-sandbox (internal)
-uv run slurm.py task=@modules/Model-Optimizer/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+uv run slurm.py --yaml modules/Model-Optimizer/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
 
 # From Model-Optimizer/launcher (public)
-uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
 ```
diff --git a/launcher/launch.py b/launcher/launch.py
index 9d9c9c993..7251effd1 100644
--- a/launcher/launch.py
+++ b/launcher/launch.py
@@ -16,8 +16,8 @@
 """ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters.
 
 Usage:
-    uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
-    uv run launch.py task=@Qwen/Qwen3-8B/megatron_lm_ptq.yaml hf_local=/mnt/hf-local --yes
+    uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+    uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml hf_local=/mnt/hf-local --yes
 
 Environment variables:
     SLURM_HOST          Slurm login node hostname (required for remote jobs)
@@ -33,14 +33,7 @@
 import warnings
 
 import nemo_run as run
-from core import (
-    SandboxPipeline,
-    SandboxTask,
-    get_default_env,
-    register_factory,
-    run_jobs,
-    set_slurm_config_type,
-)
+from core import SandboxPipeline, get_default_env, register_factory, run_jobs, set_slurm_config_type
 from slurm_config import SlurmConfig, slurm_factory
 
 set_slurm_config_type(SlurmConfig)
@@ -80,7 +73,6 @@
 def launch(
     job_name: str = "01_job",
     job_dir: str = os.environ.get("SLURM_JOB_DIR", os.path.expanduser("~/experiments")),
-    task: SandboxTask = None,
     pipeline: SandboxPipeline = None,
     hf_local: str = None,  # noqa: RUF013
     user: str = getpass.getuser(),
@@ -96,12 +88,10 @@ def launch(
         job_dir = os.path.join(os.getcwd(), "local_experiments")
 
     job_table = {}
-    if task is not None:
-        job_table[job_name] = SandboxPipeline(tasks=[task])
-    elif pipeline is not None:
+    if pipeline is not None:
         job_table[job_name] = pipeline
     else:
-        print("No task or pipeline provided. Use task=@<yaml> or pipeline=@<yaml>.")
+        print("No pipeline provided. Use pipeline=@<yaml> or --yaml <yaml>.")
         return
 
     run_jobs(

From 8e083658be786da5181fa7d0c4968f14bbddde37 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sat, 14 Mar 2026 18:59:38 -0700
Subject: [PATCH 05/12] add: common/ scripts, EAGLE3 pipeline, ADVANCED.md

Move service scripts to common/ (query.py, query.sh, eagle3, specdec_bench,
megatron-lm quantize). Add Qwen3-8B EAGLE3 offline pipeline YAML. Add
ADVANCED.md with architecture docs and Claude Code workflows. Update
packager to include common/.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 launcher/ADVANCED.md                          | 242 ++++++++++++++++++
 launcher/Qwen/Qwen3-8B/hf_offline_eagle3.yaml | 111 ++++++++
 launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml   |   2 +-
 launcher/common/eagle3/dump_offline_data.sh   |  42 +++
 launcher/common/eagle3/offline_training.sh    |  40 +++
 .../megatron-lm/quantize/quantize.sh          |   0
 launcher/common/query.py                      | 147 +++++++++++
 .../{services => common}/service_utils.sh     |   0
 launcher/common/specdec_bench/quick_check.sh  |  27 ++
 launcher/common/tensorrt-llm/query.sh         | 130 ++++++++++
 launcher/common/vllm/query.sh                 | 129 ++++++++++
 launcher/launch.py                            |   3 +-
 12 files changed, 871 insertions(+), 2 deletions(-)
 create mode 100644 launcher/ADVANCED.md
 create mode 100644 launcher/Qwen/Qwen3-8B/hf_offline_eagle3.yaml
 create mode 100644 launcher/common/eagle3/dump_offline_data.sh
 create mode 100644 launcher/common/eagle3/offline_training.sh
 rename launcher/{services => common}/megatron-lm/quantize/quantize.sh (100%)
 create mode 100644 launcher/common/query.py
 rename launcher/{services => common}/service_utils.sh (100%)
 create mode 100644 launcher/common/specdec_bench/quick_check.sh
 create mode 100644 launcher/common/tensorrt-llm/query.sh
 create mode 100755 launcher/common/vllm/query.sh

diff --git a/launcher/ADVANCED.md b/launcher/ADVANCED.md
new file mode 100644
index 000000000..fb4bc0256
--- /dev/null
+++ b/launcher/ADVANCED.md
@@ -0,0 +1,242 @@
+# Advanced Guide
+
+## Architecture
+
+### Shared Core
+
+The launcher is built on a shared `core.py` module used by both:
+
+- **`launch.py`** — public-facing launcher (this repo)
+- **`slurm.py`** — internal CI orchestrator ([nmm-sandbox](https://gitlab-master.nvidia.com/omniml/integration/nmm-sandbox))
+
+```text
+core.py (shared)
+├── Dataclasses: SandboxTask, SandboxPipeline, GlobalVariables
+├── Executor builders: build_slurm_executor(), build_docker_executor()
+├── Job runner: run_jobs()
+├── Version reporter: report_versions()
+├── Factory registry: register_factory(), set_slurm_config_type()
+└── Default env: get_default_env()
+
+launch.py                              slurm.py (nmm-sandbox)
+├── imports core.py                    ├── imports core.py (via sys.path)
+├── slurm_config.py (env-var driven)   ├── tools/slurm_config.py (cluster-specific)
+├── registers: slurm_factory           ├── registers: oci_hsg, cw_dfw, computelab, ...
+├── packager (LAUNCHER_DIR relative)   ├── packager (repo root relative)
+└── launch() entrypoint                └── cicd() entrypoint
+```
+
+### Code Packaging
+
+When a job is submitted, `PatternPackager` creates a tar.gz of the source code and rsyncs it to the cluster. The `code/` directory on the remote mirrors the launcher structure:
+
+```text
+code/
+├── modules/
+│   ├── Megatron-LM/megatron/...      # Training framework
+│   └── Model-Optimizer/modelopt/...   # ModelOpt library (mounted over container install)
+└── services/
+    └── megatron-lm/quantize/
+        └── quantize.sh               # Job script
+```
+
+The `modelopt/` directory is bind-mounted over the container's installed ModelOpt, so your local changes take effect without rebuilding the container.
+
+### Factory System
+
+Slurm cluster configs use a factory pattern. YAMLs reference a factory by name:
+
+```yaml
+slurm_config:
+  _factory_: "slurm_factory"
+  nodes: 1
+```
+
+Factories are registered at import time via `register_factory()`. In `launch.py`, `slurm_factory` reads from environment variables (`SLURM_HOST`, `SLURM_ACCOUNT`, etc.). In `slurm.py`, `slurm_factory` resolves to a cluster-specific factory based on `SLURM_CLUSTER`:
+
+```bash
+# Default (OCI-HSG)
+uv run slurm.py --yaml config.yaml --yes
+
+# Switch cluster
+SLURM_CLUSTER=cw_dfw uv run slurm.py --yaml config.yaml --yes
+```
+
+### YAML Formats
+
+**`--yaml` format** (recommended) — maps top-level keys to function args:
+
+```yaml
+job_name: Qwen3-8B_NVFP4
+pipeline:
+  task_0:
+    script: services/megatron-lm/quantize/quantize.sh
+    slurm_config:
+      _factory_: "slurm_factory"
+```
+
+**`pipeline=@` format** — bare pipeline without wrapper:
+
+```yaml
+task_0:
+  script: services/megatron-lm/quantize/quantize.sh
+  slurm_config:
+    _factory_: "slurm_factory"
+```
+
+**Test YAML format** — list of jobs with `_target_` and overrides, used by `tools/run_test_yaml.sh`:
+
+```yaml
+- _target_: Qwen/Qwen3-8B/megatron_lm_ptq.yaml
+  pipeline:
+    allow_to_fail: true
+    skip: false
+    note: "known flaky"
+```
+
+Overrides are flattened to dot-notation and passed as nemo-run CLI args (e.g., `pipeline.allow_to_fail=True`).
+
+### Global Variables
+
+Pipeline YAMLs support `<<global_vars.X>>` interpolation for sharing values across tasks:
+
+```yaml
+pipeline:
+  global_vars:
+    hf_model: /hf-local/Qwen/Qwen3-8B
+
+  task_0:
+    environment:
+      - HF_MODEL_CKPT: <<global_vars.hf_model>>
+
+  task_1:
+    environment:
+      - HF_MODEL_CKPT: <<global_vars.hf_model>>
+```
+
+This is resolved in `SandboxPipeline.__post_init__` using regex substitution, not OmegaConf (which fails on isolated sub-configs in nemo-run).
+
+### Metadata
+
+Each experiment writes `metadata.json` to `experiments/<title>/<id>/`:
+
+```json
+{
+  "experiment_id": "cicd_1773420387",
+  "job_name": "Qwen3-8B_NVFP4_DEFAULT_CFG",
+  "allow_to_fail": false,
+  "note": ""
+}
+```
+
+This is used by:
+
+- `tools/wait_for_experiments.sh` — skip blocking on `allow_to_fail` failures
+- `tools/post_review_to_gitlab.sh` — create/update GitLab issues for allowed failures
+- Claude Code's `review-logs` skill — emit `<system-out>` instead of `<failure>` in JUnit XML
+
+## Using Claude Code with the Launcher
+
+Claude Code can create a tight feedback loop for model quantization experiments: configure → submit → monitor → diagnose → fix → resubmit — all from the CLI.
+
+### Setup
+
+Install Claude Code and ensure the launcher is ready:
+
+```bash
+npm install -g @anthropic-ai/claude-code
+cd Model-Optimizer/launcher
+git submodule update --init --recursive
+```
+
+### Workflow: Submit and Monitor
+
+Ask Claude Code to launch a job and wait for results:
+
+```text
+> Run Qwen3-8B quantization on OCI-HSG and wait for it to finish
+
+Claude will:
+1. Run: uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+2. Monitor with: NEMORUN_HOME=$(pwd) uv run nemo experiment status <id>
+3. Fetch logs when done: NEMORUN_HOME=$(pwd) uv run nemo experiment logs <id> 0
+4. Report the MMLU score and pass/fail status
+```
+
+### Workflow: Diagnose Failures
+
+When a job fails, ask Claude Code to analyze the logs:
+
+```text
+> /review-logs
+
+Claude will:
+1. Find all experiments in experiments/
+2. Fetch logs via nemo experiment logs
+3. Read and analyze error tracebacks
+4. Produce a structured report with root cause and suggested fix
+5. Write a JUnit XML for CI integration
+```
+
+### Workflow: Add a New Model
+
+Ask Claude Code to set up a new model config:
+
+```text
+> Add Llama-3.1-70B quantization config. It needs 2 nodes with 4 GPUs each.
+
+Claude will:
+1. Create Meta/Llama-3.1-70B/megatron_lm_ptq.yaml
+2. Set appropriate TP/EP based on model size
+3. Reference the correct service script
+4. Test with --dryrun to verify the config
+```
+
+### Workflow: Iterate on Failures
+
+Claude Code can fix issues and resubmit in a loop:
+
+```text
+> The job failed with CUDA OOM. Try reducing the sequence length to 4096 and resubmit.
+
+Claude will:
+1. Edit the YAML config
+2. Resubmit with uv run launch.py --yaml <config> --yes
+3. Monitor and report results
+```
+
+### Workflow: Reproduce and Compare
+
+Use `--to-yaml` to capture configs and compare runs:
+
+```text
+> Dump the resolved config for Qwen3-8B, then run it on both OCI-HSG and CW-DFW
+
+Claude will:
+1. Dump: uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml resolved.yaml
+2. Run on OCI-HSG: SLURM_CLUSTER=oci_hsg uv run slurm.py --yaml resolved.yaml --yes
+3. Run on CW-DFW: SLURM_CLUSTER=cw_dfw uv run slurm.py --yaml resolved.yaml --yes
+4. Compare MMLU results
+```
+
+### Skills
+
+The following Claude Code skills are available in the nmm-sandbox project:
+
+| Skill | Trigger | Description |
+|---|---|---|
+| `/review-logs` | After job completion or failure | Analyze experiment logs, diagnose failures, produce JUnit XML |
+| `/wait-for-jobs` | After detached submission | Poll experiment status until all jobs finish |
+| `/eagle3-new-model` | Adding a new EAGLE3 model | Generate pipeline YAML for a new model |
+
+### CI Integration
+
+In CI, Claude Code runs automatically after each test job to:
+
+1. Fetch and analyze all experiment logs
+2. Generate `claude_analysis.md` with structured findings
+3. Write `claude_review_rspec.xml` for GitLab test reporting
+4. Post failure summaries as MR comments (via `tools/post_review_to_gitlab.sh`)
+5. Create/update GitLab issues for `allow_to_fail` jobs that are consistently failing
+
+If the main script crashes before the review runs, an `after_script` fallback posts the captured job output to the MR so failures are always visible.
diff --git a/launcher/Qwen/Qwen3-8B/hf_offline_eagle3.yaml b/launcher/Qwen/Qwen3-8B/hf_offline_eagle3.yaml
new file mode 100644
index 000000000..19b6cc0d2
--- /dev/null
+++ b/launcher/Qwen/Qwen3-8B/hf_offline_eagle3.yaml
@@ -0,0 +1,111 @@
+# EAGLE3 offline speculative decoding pipeline for Qwen3-8B.
+#
+# 4-step pipeline:
+#   task_0: Data synthesis — query TRT-LLM server to generate prompt samples
+#   task_1: Dump hidden states — run target model to capture hidden states
+#   task_2: Offline training — train the EAGLE3 draft head
+#   task_3: Benchmark — evaluate speculative decoding speedup via VLLM
+#
+# All tasks share /scratchspace to pass artifacts between steps.
+#
+# Usage:
+#   uv run launch.py --yaml Qwen/Qwen3-8B/hf_offline_eagle3.yaml --yes
+#   uv run slurm.py --yaml modules/Model-Optimizer/launcher/Qwen/Qwen3-8B/hf_offline_eagle3.yaml --yes
+
+job_name: Qwen3-8B_EAGLE3_offline
+pipeline:
+  allow_to_fail: false
+  skip: false
+  note:
+
+  global_vars:
+    hf_model: /hf-local/Qwen/Qwen3-8B
+
+  # Step 1: Data synthesis via TRT-LLM server
+  # Args before "--" go to trtllm-serve; args after "--" go to tools/query.py.
+  task_0:
+    script: common/tensorrt-llm/query.sh
+    args:
+      - --model <<global_vars.hf_model>>
+      - --tp_size 4
+      - --ep_size 4
+      - --max_num_tokens 32000
+      - --port 8000
+      - --host 0.0.0.0
+      - --trust_remote_code
+      - --
+      - --data /hf-local/modelopt/Speculative-Decoding-Prompt-Samples
+      - --save /scratchspace/data
+    environment:
+      - HF_LOCAL: /hf-local
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 4
+      gpus_per_node: 4
+      container: nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc2
+
+  # Step 2: Dump hidden states from target model
+  task_1:
+    script: common/eagle3/dump_offline_data.sh
+    args:
+      - --input-data /scratchspace/data
+      - --output-dir /scratchspace/offline_hidden_states
+      - --max-seq-len 8192
+      - --tp 4
+      - --moe-ep 4
+    environment:
+      - HF_MODEL_CKPT: <<global_vars.hf_model>>
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 4
+      gpus_per_node: 4
+      container: nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc2
+
+  # Step 3: Train EAGLE3 draft head (offline, single task)
+  task_2:
+    script: common/eagle3/offline_training.sh
+    args:
+      - --offline-data /scratchspace/offline_hidden_states
+      - --data_path None
+      - --mode eagle3
+      - --num_epochs 1
+      - --lr 3e-4
+      - --save_steps 500000
+      - --output_dir /scratchspace/eagle3
+      - --train_bs 8
+      - --training_seq_len 4096
+      - --eagle_config modules/Model-Optimizer/examples/speculative_decoding/eagle_config.json
+      - --disable_tqdm True
+      - --ar_validate_steps 500000
+    environment:
+      - HF_MODEL_CKPT: <<global_vars.hf_model>>
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 1
+      gpus_per_node: 4
+      container: nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc2
+
+  # Step 4: Benchmark speculative decoding (VLLM backend)
+  task_3:
+    script: common/specdec_bench/quick_check.sh
+    args:
+      - --draft_model_dir /scratchspace/export
+      - --draft_length 3
+      - --output_length 4096
+      - --engine VLLM
+      - --tp_size 4
+      - --ep_size 1
+      - --speculative_algorithm EAGLE3
+      - --mtbench /hf-local/HuggingFaceH4/mt_bench_prompts/raw/question.jsonl
+      - --concurrency 1
+    environment:
+      - HF_MODEL_CKPT: <<global_vars.hf_model>>
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 1
+      gpus_per_node: 4
+      container: vllm/vllm-openai:latest
diff --git a/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml b/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
index 83ed7f4f0..ce7f81224 100644
--- a/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
+++ b/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
@@ -5,7 +5,7 @@ pipeline:
   note:
 
   task_0:
-    script: services/megatron-lm/quantize/quantize.sh
+    script: common/megatron-lm/quantize/quantize.sh
     args:
       - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
       - --calib-size 32
diff --git a/launcher/common/eagle3/dump_offline_data.sh b/launcher/common/eagle3/dump_offline_data.sh
new file mode 100644
index 000000000..a11f7f7ed
--- /dev/null
+++ b/launcher/common/eagle3/dump_offline_data.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+
+source ${SCRIPT_DIR}/../service_utils.sh
+
+###################################################################################################
+
+if [ -z ${SLURM_ARRAY_TASK_ID} ]; then
+    TASK_ID=0
+else
+    echo "SLURM_ARRAY_TASK_ID ${SLURM_ARRAY_TASK_ID}"
+    TASK_ID=${SLURM_ARRAY_TASK_ID}
+fi
+
+if [ -z ${SLURM_ARRAY_TASK_COUNT} ]; then
+    TASK_COUNT=1
+else
+    echo "SLURM_ARRAY_TASK_COUNT ${SLURM_ARRAY_TASK_COUNT}"
+    TASK_COUNT=${SLURM_ARRAY_TASK_COUNT}
+fi
+
+trtllm-llmapi-launch python3 modules/Model-Optimizer/examples/speculative_decoding/collect_hidden_states/compute_hidden_states_trtllm.py \
+    --model ${HF_MODEL_CKPT} \
+    --dp-rank ${TASK_ID} \
+    --dp-world-size ${TASK_COUNT} \
+    ${@}
diff --git a/launcher/common/eagle3/offline_training.sh b/launcher/common/eagle3/offline_training.sh
new file mode 100644
index 000000000..4dfe2de7c
--- /dev/null
+++ b/launcher/common/eagle3/offline_training.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+source ${SCRIPT_DIR}/service_utils.sh
+
+pip install -r modules/Model-Optimizer/examples/speculative_decoding/requirements.txt
+pip install huggingface-hub>=1.2.1
+export PATH=$PATH:/workspace/.local/bin
+
+###################################################################################################
+
+trap 'error_handler $0 $LINENO' ERR # ERROR HANDLER
+
+bash modules/Model-Optimizer/examples/speculative_decoding/launch_train.sh \
+    --model ${HF_MODEL_CKPT} \
+    ${@}
+
+python modules/Model-Optimizer/examples/speculative_decoding/scripts/export_hf_checkpoint.py \
+    --model_path /scratchspace/eagle3 \
+    --export_path /scratchspace/export
+
+###################################################################################################
+
+# This function handles the exit status (fails the CI).
+#exit_handler $0
diff --git a/launcher/services/megatron-lm/quantize/quantize.sh b/launcher/common/megatron-lm/quantize/quantize.sh
similarity index 100%
rename from launcher/services/megatron-lm/quantize/quantize.sh
rename to launcher/common/megatron-lm/quantize/quantize.sh
diff --git a/launcher/common/query.py b/launcher/common/query.py
new file mode 100644
index 000000000..79ec93f54
--- /dev/null
+++ b/launcher/common/query.py
@@ -0,0 +1,147 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ruff: noqa: D100,D101,D102,D103,D107,F841,PLR1722
+import argparse
+import os
+
+from datasets import load_dataset
+from openai import OpenAI
+
+early_termination = False
+
+
+class LLM:
+    def __init__(self, args):
+        self.args = args
+        self.client = OpenAI(base_url=args.base_url)
+        self.generate(messages=[{"role": "user", "content": "Hello! /no_think"}], verbose=True)
+
+    def generate(self, messages, verbose=False, **chat_template_kwargs):
+        try:
+            completion = self.client.chat.completions.create(
+                model=self.args.model,
+                messages=messages,
+                temperature=self.args.temperature,
+            )
+            new_message = completion.choices[0].message.content
+            if verbose:
+                for msg in messages:
+                    print("[OLD] {:10}: {:64}".format(msg["role"], msg["content"]))
+                print("[NEW] {:10}: {:64}\n\n".format("assistant", new_message))
+
+            new_message = {"role": "assistant", "content": new_message}
+        except Exception as e:
+            print(e)
+
+            if "Connection error" in str(e):
+                early_termination = True
+
+            new_message = None
+
+        return new_message
+
+
+parser = argparse.ArgumentParser(prog="query")
+parser.add_argument("base_url", type=str, help="url to the OpenAI compatible API.")
+parser.add_argument("model", type=str, help="model name")
+parser.add_argument(
+    "--data", type=str, default=None, help="path to OAI chat data (local or HF hub)"
+)
+parser.add_argument("--data-split", type=str, default="train", help="HF dataset split")
+parser.add_argument("--save", type=str, default=None, help="path to store the generated output.")
+parser.add_argument("--num-shards", type=int, default=1000, help="number of shards.")
+parser.add_argument("--shard-id-begin", type=int, default=0, help="the shard id to start.")
+parser.add_argument(
+    "--shard-id-step", type=int, default=1, help="the step that the shard id progress."
+)
+parser.add_argument("--num-proc", type=int, default=32, help="number of processes (concurrency).")
+parser.add_argument("--temperature", type=float, default=0.0, help="temperature.")
+args = parser.parse_args()
+
+llm = LLM(args)
+
+if args.data is None:
+    exit(0)
+
+
+def disable_thinking_column(data):
+    data.update({"enable_thinking": False})
+    return data
+
+
+def synthesize(data):
+    messages = data.get("conversations", None)
+    if messages is None:
+        messages = data.get("messages", None)
+    if messages is None:
+        raise ValueError(
+            "No conversations of messages in the data. Only OAI chat data is supported."
+        )
+
+    # Handle generation specific kwargs.
+    enable_thinking = data.get("enable_thinking", True)
+
+    current_messages = []
+
+    for msg in messages:
+        if msg["role"] == "system":
+            current_messages.append(msg)
+        elif msg["role"] == "user":
+            if not enable_thinking:
+                msg["content"] = msg["content"] + " /no_think"
+
+            current_messages.append(msg)
+            new_message = llm.generate(current_messages, verbose=False)
+            if new_message is None:
+                break
+            else:
+                current_messages.append(new_message)
+        elif msg["role"] == "assistant":
+            # Original assistant messages are not used
+            pass
+        else:
+            raise ValueError("unknown role: {}".format(msg["role"]))
+
+    return {"conversations": current_messages}
+
+
+dataset = load_dataset(args.data, split=args.data_split)
+
+if args.num_shards * 100 > len(dataset):
+    args.num_shards = min(16, len(dataset) // 100)
+
+if args.save is not None:
+    print("Create save dir: {}".format(args.save))
+    os.makedirs(args.save, exist_ok=True)
+
+for shard_id in range(args.shard_id_begin, args.num_shards, args.shard_id_step):
+    file_path = args.save + "/train-{:05}-{:05}.jsonl".format(shard_id + 1, args.num_shards)
+
+    if os.path.exists(file_path):
+        continue
+
+    shard = dataset.shard(num_shards=args.num_shards, index=shard_id)
+    print(len(shard), file_path)
+
+    if shard_id % 2 == 0:
+        shard = shard.map(disable_thinking_column, num_proc=args.num_proc)
+    updated_shard = shard.map(synthesize, num_proc=args.num_proc)
+    updated_shard.to_json(file_path)
+    print(updated_shard[0])
+
+    if early_termination:
+        print("Terminate earlier due to server connection error!")
+        break
diff --git a/launcher/services/service_utils.sh b/launcher/common/service_utils.sh
similarity index 100%
rename from launcher/services/service_utils.sh
rename to launcher/common/service_utils.sh
diff --git a/launcher/common/specdec_bench/quick_check.sh b/launcher/common/specdec_bench/quick_check.sh
new file mode 100644
index 000000000..d90413969
--- /dev/null
+++ b/launcher/common/specdec_bench/quick_check.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+source ${SCRIPT_DIR}/../service_utils.sh
+
+###################################################################################################
+
+
+${TRTLLM_LAUNCH_SCRIPT} python3 modules/Model-Optimizer/examples/specdec_bench/run.py \
+    --model_dir ${HF_MODEL_CKPT} \
+    --tokenizer ${HF_MODEL_CKPT} \
+    ${@}
diff --git a/launcher/common/tensorrt-llm/query.sh b/launcher/common/tensorrt-llm/query.sh
new file mode 100644
index 000000000..3bc2ec106
--- /dev/null
+++ b/launcher/common/tensorrt-llm/query.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+
+source ${SCRIPT_DIR}/../service_utils.sh
+
+###################################################################################################
+# Usage:
+#   query.sh --model MODEL [SERVE_ARGS...] -- [QUERY_ARGS...]
+#
+# Launches trtllm-serve with the given model, waits for it to be ready,
+# then runs common/query.py against the server.
+#
+# --model MODEL is required and is consumed by this script. It is used as the
+# positional model argument for both trtllm-serve and common/query.py.
+#
+# Remaining arguments are split on "--":
+#   - Args BEFORE "--" are appended to the trtllm-serve command (SERVE_ARGS).
+#   - Args AFTER  "--" are passed to common/query.py (QUERY_ARGS).
+#   - If "--" is absent, all remaining args go to common/query.py.
+#
+# Environment variables (optional, set by Slurm):
+#   SLURM_ARRAY_TASK_ID     Used to shard query.py work across array jobs.
+#   SLURM_ARRAY_TASK_COUNT  Total number of array tasks for sharding.
+#
+# In a pipeline YAML task config:
+#   args:
+#     - --model /hf-local/Qwen/Qwen3-8B  # required
+#     - --tp_size 4                        # trtllm-serve args (before --)
+#     - --ep_size 4
+#     - --max_num_tokens 32000
+#     - --port 8000
+#     - --host 0.0.0.0
+#     - --trust_remote_code
+#     - --                                 # separator
+#     - --data /hf-local/dataset           # query.py args (after --)
+#     - --save /scratchspace/data
+###################################################################################################
+
+export OPENAI_API_KEY="token-abc123"
+
+if [ -z ${SLURM_ARRAY_TASK_ID} ]; then
+    TASK_ID=0
+else
+    echo "SLURM_ARRAY_TASK_ID ${SLURM_ARRAY_TASK_ID}"
+    TASK_ID=${SLURM_ARRAY_TASK_ID}
+fi
+
+if [ -z ${SLURM_ARRAY_TASK_COUNT} ]; then
+    TASK_COUNT=1
+else
+    echo "SLURM_ARRAY_TASK_COUNT ${SLURM_ARRAY_TASK_COUNT}"
+    TASK_COUNT=${SLURM_ARRAY_TASK_COUNT}
+fi
+
+# Parse --model and split remaining args on "--".
+# --model is consumed here; args before "--" go to trtllm-serve, args after go to query.py.
+MODEL=""
+SERVE_EXTRA_ARGS=()
+QUERY_ARGS=(--shard-id-begin ${TASK_ID} --shard-id-step ${TASK_COUNT})
+past_separator=false
+skip_next=false
+
+for arg in "$@"; do
+    if $skip_next; then
+        MODEL="$arg"
+        skip_next=false
+    elif [ "$arg" = "--model" ]; then
+        skip_next=true
+    elif [ "$arg" = "--" ]; then
+        past_separator=true
+    elif [ "$past_separator" = false ]; then
+        SERVE_EXTRA_ARGS+=("$arg")
+    else
+        QUERY_ARGS+=("$arg")
+    fi
+done
+
+trtllm-llmapi-launch trtllm-serve \
+    ${MODEL} \
+    "${SERVE_EXTRA_ARGS[@]}" \
+    &
+
+
+# Wait for server to start up by polling the health endpoint
+echo "Waiting for server to start..."
+while true; do
+    response=$(curl -s -o /dev/null -w "%{http_code}" "http://$(hostname -f):8000/health" || true)
+    if [ "$response" -eq 200 ]; then
+        echo "Server is up!"
+        break
+    fi
+    echo "Server not ready yet, retrying in 10 seconds..."
+    sleep 10
+done
+
+if [[ "$mpi_rank" -eq 0 ]]; then
+    cmd="python common/query.py http://localhost:8000/v1 ${MODEL} ${QUERY_ARGS[*]}"
+    echo "Running command: $cmd"
+    eval $cmd
+    echo "Main process exit"
+else
+    while true; do
+        response=$(curl -s -o /dev/null -w "%{http_code}" "http://$(hostname -f):8000/health" || true)
+        if [[ "$response" -ne 200 ]]; then
+            break
+        fi
+        #echo "Server is up!"
+        sleep 60
+    done
+fi
+
+pkill trtllm-serve
+
+exit 0
diff --git a/launcher/common/vllm/query.sh b/launcher/common/vllm/query.sh
new file mode 100755
index 000000000..d203e8994
--- /dev/null
+++ b/launcher/common/vllm/query.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+
+source ${SCRIPT_DIR}/../service_utils.sh
+
+###################################################################################################
+# Usage:
+#   query.sh --model MODEL [SERVE_ARGS...] -- [QUERY_ARGS...]
+#
+# Launches vllm serve with the given model, waits for it to be ready,
+# then runs common/query.py against the server.
+#
+# --model MODEL is required and is consumed by this script. It is used as the
+# positional model argument for both vllm serve and common/query.py.
+#
+# Remaining arguments are split on "--":
+#   - Args BEFORE "--" are appended to the vllm serve command (SERVE_ARGS).
+#   - Args AFTER  "--" are passed to common/query.py (QUERY_ARGS).
+#   - If "--" is absent, all remaining args go to common/query.py.
+#
+# Environment variables (optional, set by Slurm):
+#   SLURM_ARRAY_TASK_ID     Used to shard query.py work across array jobs.
+#   SLURM_ARRAY_TASK_COUNT  Total number of array tasks for sharding.
+#
+# vLLM notes:
+#   - vLLM manages GPU distribution internally; run with ntasks_per_node: 1
+#     in slurm_config and pass --tensor-parallel-size to match gpus_per_node.
+#   - NVFP4 models require vllm/vllm-openai:v0.15.0+ on Blackwell GPUs.
+#   - Use --trust-remote-code for models with custom architectures (e.g. Kimi).
+#
+# In a pipeline YAML task config:
+#   args:
+#     - --model /hf-local/Qwen/Qwen3-8B  # required
+#     - --tensor-parallel-size 4           # vllm serve args (before --)
+#     - --max-num-seqs 32
+#     - --trust-remote-code
+#     - --                                 # separator
+#     - --data /hf-local/dataset           # query.py args (after --)
+#     - --save /scratchspace/data
+#   slurm_config:
+#     ntasks_per_node: 1                   # vLLM is single-process
+#     gpus_per_node: 4
+###################################################################################################
+
+export OPENAI_API_KEY="token-abc123"
+
+if [ -z ${SLURM_ARRAY_TASK_ID} ]; then
+    TASK_ID=0
+else
+    echo "SLURM_ARRAY_TASK_ID ${SLURM_ARRAY_TASK_ID}"
+    TASK_ID=${SLURM_ARRAY_TASK_ID}
+fi
+
+if [ -z ${SLURM_ARRAY_TASK_COUNT} ]; then
+    TASK_COUNT=1
+else
+    echo "SLURM_ARRAY_TASK_COUNT ${SLURM_ARRAY_TASK_COUNT}"
+    TASK_COUNT=${SLURM_ARRAY_TASK_COUNT}
+fi
+
+# Parse --model and split remaining args on "--".
+# --model is consumed here; args before "--" go to vllm serve, args after go to query.py.
+MODEL=""
+SERVE_EXTRA_ARGS=()
+QUERY_ARGS=(--shard-id-begin ${TASK_ID} --shard-id-step ${TASK_COUNT})
+past_separator=false
+skip_next=false
+
+for arg in "$@"; do
+    if $skip_next; then
+        MODEL="$arg"
+        skip_next=false
+    elif [ "$arg" = "--model" ]; then
+        skip_next=true
+    elif [ "$arg" = "--" ]; then
+        past_separator=true
+    elif [ "$past_separator" = false ]; then
+        SERVE_EXTRA_ARGS+=("$arg")
+    else
+        QUERY_ARGS+=("$arg")
+    fi
+done
+
+# vLLM is single-process: GPU parallelism is handled internally via --tensor-parallel-size.
+# No MPI multi-rank logic needed; this script always runs as a single task.
+vllm serve \
+    ${MODEL} \
+    "${SERVE_EXTRA_ARGS[@]}" \
+    &
+SERVER_PID=$!
+
+
+# Wait for server to start up by polling the health endpoint
+echo "Waiting for server to start..."
+while true; do
+    response=$(curl -s -o /dev/null -w "%{http_code}" "http://$(hostname -f):8000/health" || true)
+    if [ "$response" -eq 200 ]; then
+        echo "Server is up!"
+        break
+    fi
+    echo "Server not ready yet, retrying in 10 seconds..."
+    sleep 10
+done
+
+cmd="python common/query.py http://localhost:8000/v1 ${MODEL} ${QUERY_ARGS[*]}"
+echo "Running command: $cmd"
+eval $cmd
+echo "Main process exit"
+
+kill $SERVER_PID
+wait $SERVER_PID 2>/dev/null || true
+
+exit 0
diff --git a/launcher/launch.py b/launcher/launch.py
index 7251effd1..5b90d9acf 100644
--- a/launcher/launch.py
+++ b/launcher/launch.py
@@ -57,8 +57,9 @@
         "modules/Model-Optimizer/modelopt/*",
         "modules/Model-Optimizer/examples/*",
         "services/*",
+        "common/*",
     ],
-    relative_path=[LAUNCHER_DIR] * 6,
+    relative_path=[LAUNCHER_DIR] * 7,
 )
 
 MODELOPT_SRC_PATH = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")

From 22b5267ef6fff5710c1541277b02c87eace1cb26 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sat, 14 Mar 2026 19:44:28 -0700
Subject: [PATCH 06/12] add: unit tests for launcher (64 tests, all passing)

Add tests/unit/launcher/ with 7 test files covering core dataclasses,
factory registry, global_vars, env merging, YAML formats, Docker
executor mounts, Slurm executor params (mocked), and end-to-end Docker
launch via subprocess.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 tests/unit/launcher/__init__.py              |  31 ++
 tests/unit/launcher/conftest.py              |  54 +++
 tests/unit/launcher/test_core.py             | 243 +++++++++++++
 tests/unit/launcher/test_core_extended.py    | 352 +++++++++++++++++++
 tests/unit/launcher/test_docker_execution.py | 331 +++++++++++++++++
 tests/unit/launcher/test_docker_launch.py    | 124 +++++++
 tests/unit/launcher/test_slurm_config.py     | 118 +++++++
 tests/unit/launcher/test_slurm_executor.py   | 230 ++++++++++++
 tests/unit/launcher/test_yaml_formats.py     | 193 ++++++++++
 9 files changed, 1676 insertions(+)
 create mode 100644 tests/unit/launcher/__init__.py
 create mode 100644 tests/unit/launcher/conftest.py
 create mode 100644 tests/unit/launcher/test_core.py
 create mode 100644 tests/unit/launcher/test_core_extended.py
 create mode 100644 tests/unit/launcher/test_docker_execution.py
 create mode 100644 tests/unit/launcher/test_docker_launch.py
 create mode 100644 tests/unit/launcher/test_slurm_config.py
 create mode 100644 tests/unit/launcher/test_slurm_executor.py
 create mode 100644 tests/unit/launcher/test_yaml_formats.py

diff --git a/tests/unit/launcher/__init__.py b/tests/unit/launcher/__init__.py
new file mode 100644
index 000000000..7c9dc907f
--- /dev/null
+++ b/tests/unit/launcher/__init__.py
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for the ModelOpt Launcher.
+
+Coverage:
+    - test_core.py: Shared dataclasses, factory registry, global_vars interpolation,
+      version reporting, default env generation, and the run_jobs loop (mocked).
+    - test_slurm_config.py: SlurmConfig dataclass defaults and slurm_factory behavior
+      with environment variable overrides.
+    - test_yaml_formats.py: YAML parsing for --yaml format, pipeline=@ format, and
+      task_configs resolution via registered factories.
+
+Not covered (requires live infrastructure):
+    - Actual Slurm job submission (SSH tunnel, sbatch)
+    - Docker container launch
+    - nemo experiment status/logs polling
+    - PatternPackager tar.gz creation and rsync
+"""
diff --git a/tests/unit/launcher/conftest.py b/tests/unit/launcher/conftest.py
new file mode 100644
index 000000000..d19ced583
--- /dev/null
+++ b/tests/unit/launcher/conftest.py
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Fixtures for launcher unit tests.
+
+These tests can be run standalone without installing modelopt:
+    cd Model-Optimizer/launcher
+    uv pip install pytest
+    uv run python3 -m pytest ../tests/unit/launcher/ -v -o "addopts=" --rootdir=.
+"""
+
+import os
+import sys
+
+import pytest
+
+# Prevent pytest from loading the root conftest.py (which imports torch/modelopt)
+collect_ignore_glob = ["../../conftest.py"]
+
+
+@pytest.fixture(autouse=True)
+def add_launcher_to_path():
+    """Add the launcher directory to sys.path so core.py and slurm_config.py can be imported."""
+    launcher_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "launcher")
+    launcher_dir = os.path.abspath(launcher_dir)
+    if launcher_dir not in sys.path:
+        sys.path.insert(0, launcher_dir)
+    yield
+    if launcher_dir in sys.path:
+        sys.path.remove(launcher_dir)
+
+
+@pytest.fixture
+def tmp_yaml(tmp_path):
+    """Helper to write a YAML file and return its path."""
+
+    def _write(content, name="test.yaml"):
+        p = tmp_path / name
+        p.write_text(content)
+        return str(p)
+
+    return _write
diff --git a/tests/unit/launcher/test_core.py b/tests/unit/launcher/test_core.py
new file mode 100644
index 000000000..69c0fc40d
--- /dev/null
+++ b/tests/unit/launcher/test_core.py
@@ -0,0 +1,243 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for launcher/core.py — shared dataclasses, factory registry, and utilities.
+
+Coverage:
+    - SandboxTask: dataclass fields and defaults, skip flag
+    - SandboxPipeline: task slot collection, task_configs resolution, global_vars interpolation
+    - Factory registry: register_factory, lookup in create_task_from_yaml
+    - set_slurm_config_type: patches SandboxTask annotation
+    - get_default_env: returns correct env dicts for a given experiment title
+    - report_versions: runs without error on a git repo
+"""
+
+import os
+
+
+class TestSandboxTask:
+    """Tests for the SandboxTask dataclass."""
+
+    def test_defaults(self):
+        from core import SandboxTask
+
+        task = SandboxTask()
+        assert task.script is None
+        assert task.slurm_config is None
+        assert task.args is None
+        assert task.environment is None
+        assert task.skip is False
+
+    def test_with_values(self):
+        from core import SandboxTask
+
+        task = SandboxTask(
+            script="test.sh",
+            args=["--foo", "bar"],
+            environment=[{"KEY": "val"}],
+            skip=True,
+        )
+        assert task.script == "test.sh"
+        assert task.args == ["--foo", "bar"]
+        assert task.environment == [{"KEY": "val"}]
+        assert task.skip is True
+
+
+class TestSandboxPipeline:
+    """Tests for SandboxPipeline task collection and global_vars interpolation."""
+
+    def test_task_slots_collected(self):
+        from core import SandboxPipeline, SandboxTask0, SandboxTask1
+
+        t0 = SandboxTask0(script="a.sh")
+        t1 = SandboxTask1(script="b.sh")
+        pipeline = SandboxPipeline(task_0=t0, task_1=t1)
+        assert len(pipeline.tasks) == 2
+        assert pipeline.tasks[0].script == "a.sh"
+        assert pipeline.tasks[1].script == "b.sh"
+
+    def test_empty_pipeline(self):
+        from core import SandboxPipeline
+
+        pipeline = SandboxPipeline()
+        assert pipeline.tasks == []
+
+    def test_global_vars_interpolation_in_environment(self):
+        from core import GlobalVariables, SandboxPipeline, SandboxTask0
+
+        t0 = SandboxTask0(
+            script="test.sh",
+            environment=[{"MODEL": "<<global_vars.hf_model>>"}],
+        )
+        pipeline = SandboxPipeline(
+            task_0=t0,
+            global_vars=GlobalVariables(hf_model="/hf-local/Qwen/Qwen3-8B"),
+        )
+        assert pipeline.tasks[0].environment == [{"MODEL": "/hf-local/Qwen/Qwen3-8B"}]
+
+    def test_global_vars_interpolation_in_args(self):
+        from core import GlobalVariables, SandboxPipeline, SandboxTask0
+
+        t0 = SandboxTask0(
+            script="test.sh",
+            args=["--model", "<<global_vars.hf_model>>"],
+        )
+        pipeline = SandboxPipeline(
+            task_0=t0,
+            global_vars=GlobalVariables(hf_model="/models/llama"),
+        )
+        assert pipeline.tasks[0].args == ["--model", "/models/llama"]
+
+    def test_global_vars_unresolved_passthrough(self):
+        from core import GlobalVariables, SandboxPipeline, SandboxTask0
+
+        t0 = SandboxTask0(
+            script="test.sh",
+            args=["<<global_vars.nonexistent>>"],
+        )
+        pipeline = SandboxPipeline(
+            task_0=t0,
+            global_vars=GlobalVariables(hf_model="/models/llama"),
+        )
+        # Unresolved references are left as-is
+        assert pipeline.tasks[0].args == ["<<global_vars.nonexistent>>"]
+
+    def test_skip_and_allow_to_fail(self):
+        from core import SandboxPipeline
+
+        pipeline = SandboxPipeline(skip=True, allow_to_fail=True, note="test note")
+        assert pipeline.skip is True
+        assert pipeline.allow_to_fail is True
+        assert pipeline.note == "test note"
+
+
+class TestFactoryRegistry:
+    """Tests for register_factory and its use in create_task_from_yaml."""
+
+    def test_register_and_lookup(self, tmp_yaml):
+        from core import _FACTORY_REGISTRY, register_factory
+
+        # Register a mock factory
+        def mock_factory(nodes=1, **kwargs):
+            return {"nodes": nodes, "factory": "mock"}
+
+        register_factory("mock_factory", mock_factory)
+        assert "mock_factory" in _FACTORY_REGISTRY
+        assert _FACTORY_REGISTRY["mock_factory"] is mock_factory
+
+    def test_create_task_from_yaml_uses_registry(self, tmp_yaml):
+        from core import create_task_from_yaml, register_factory
+
+        def test_factory(nodes=1):
+            return {"nodes": nodes}
+
+        register_factory("test_factory", test_factory)
+
+        yaml_content = """
+script: test.sh
+args:
+  - --flag
+slurm_config:
+  _factory_: "test_factory"
+  nodes: 2
+"""
+        path = tmp_yaml(yaml_content)
+        task = create_task_from_yaml(path, factory_lookup={"test_factory": test_factory})
+        assert task.script == "test.sh"
+        assert task.args == ["--flag"]
+        assert task.slurm_config == {"nodes": 2}
+
+    def test_task_configs_resolved_via_registry(self, tmp_yaml):
+        from core import SandboxPipeline, register_factory
+
+        def dummy_factory(nodes=1):
+            return {"nodes": nodes}
+
+        register_factory("dummy_factory", dummy_factory)
+
+        task_yaml = tmp_yaml(
+            """
+script: hello.sh
+slurm_config:
+  _factory_: "dummy_factory"
+  nodes: 3
+""",
+            name="task.yaml",
+        )
+        pipeline = SandboxPipeline(task_configs=[task_yaml])
+        assert len(pipeline.tasks) == 1
+        assert pipeline.tasks[0].script == "hello.sh"
+        assert pipeline.tasks[0].slurm_config == {"nodes": 3}
+
+
+class TestSetSlurmConfigType:
+    """Tests for set_slurm_config_type annotation patching."""
+
+    def test_patches_annotation(self):
+        from dataclasses import dataclass
+
+        from core import SandboxTask, set_slurm_config_type
+
+        @dataclass
+        class MockSlurmConfig:
+            host: str = "test"
+
+        set_slurm_config_type(MockSlurmConfig)
+        assert SandboxTask.__annotations__["slurm_config"] is MockSlurmConfig
+        assert SandboxTask.__dataclass_fields__["slurm_config"].type is MockSlurmConfig
+
+
+class TestGetDefaultEnv:
+    """Tests for get_default_env utility."""
+
+    def test_default_title(self):
+        from core import get_default_env
+
+        slurm_env, local_env = get_default_env()
+        assert slurm_env["TRITON_CACHE_DIR"] == "/cicd/triton-cache"
+        assert slurm_env["HF_HOME"] == "/cicd/hf-cache"
+        assert slurm_env["MLM_SKIP_INSTALL"] == "1"
+        assert "LAUNCH_SCRIPT" in slurm_env
+        assert local_env["TRITON_CACHE_DIR"] == "/cicd/triton-cache"
+        assert "LAUNCH_SCRIPT" not in local_env
+
+    def test_custom_title(self):
+        from core import get_default_env
+
+        slurm_env, local_env = get_default_env("modelopt")
+        assert slurm_env["TRITON_CACHE_DIR"] == "/modelopt/triton-cache"
+        assert slurm_env["HF_HOME"] == "/modelopt/hf-cache"
+        assert local_env["HF_HOME"] == "/modelopt/hf-cache"
+
+
+class TestReportVersions:
+    """Tests for report_versions git info utility."""
+
+    def test_runs_on_repo(self, capsys):
+        from core import report_versions
+
+        # Should not raise — runs git on the current repo
+        report_versions(os.getcwd())
+        captured = capsys.readouterr()
+        assert "Version Report" in captured.out
+
+    def test_runs_on_nonexistent_dir(self, capsys):
+        from core import report_versions
+
+        # Should handle gracefully — "unknown" for non-git dirs
+        report_versions("/tmp/nonexistent_dir_12345")
+        captured = capsys.readouterr()
+        assert "Version Report" in captured.out
+        assert "unknown" in captured.out
diff --git a/tests/unit/launcher/test_core_extended.py b/tests/unit/launcher/test_core_extended.py
new file mode 100644
index 000000000..698c5b438
--- /dev/null
+++ b/tests/unit/launcher/test_core_extended.py
@@ -0,0 +1,352 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Extended tests for launcher/core.py — edge cases and remaining coverage gaps.
+
+Coverage:
+    - create_task_from_yaml: error cases (missing factory, bad YAML)
+    - SandboxPipeline: dict environment (not list), task_configs with registry fallback
+    - _git_info: direct tests for success and failure
+    - run_jobs: environment merging (list vs dict), test_level filtering, pipeline skip,
+      detach flag, version report
+"""
+
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+class TestCreateTaskFromYamlErrors:
+    """Error handling in create_task_from_yaml."""
+
+    def test_missing_factory_raises(self, tmp_yaml):
+        from core import create_task_from_yaml
+
+        yaml_content = """
+script: test.sh
+slurm_config:
+  _factory_: "nonexistent_factory"
+  nodes: 1
+"""
+        path = tmp_yaml(yaml_content)
+        with pytest.raises(KeyError):
+            create_task_from_yaml(path, factory_lookup={})
+
+    def test_missing_slurm_config_raises(self, tmp_yaml):
+        from core import create_task_from_yaml
+
+        yaml_content = """
+script: test.sh
+"""
+        path = tmp_yaml(yaml_content)
+        with pytest.raises((KeyError, TypeError)):
+            create_task_from_yaml(path, factory_lookup={})
+
+    def test_environment_preserved(self, tmp_yaml):
+        from core import create_task_from_yaml
+
+        def factory(nodes=1):
+            return {"nodes": nodes}
+
+        yaml_content = """
+script: test.sh
+environment:
+  - KEY1: val1
+  - KEY2: val2
+slurm_config:
+  _factory_: "f"
+  nodes: 1
+"""
+        path = tmp_yaml(yaml_content)
+        task = create_task_from_yaml(path, factory_lookup={"f": factory})
+        assert task.environment == [{"KEY1": "val1"}, {"KEY2": "val2"}]
+
+
+class TestSandboxPipelineExtended:
+    """Extended SandboxPipeline tests."""
+
+    def test_dict_environment_interpolation(self):
+        """Global vars resolve in dict-format environment (not list)."""
+        from core import GlobalVariables, SandboxPipeline, SandboxTask0
+
+        t0 = SandboxTask0(
+            script="test.sh",
+            environment={"MODEL": "<<global_vars.hf_model>>", "STATIC": "value"},
+        )
+        pipeline = SandboxPipeline(
+            task_0=t0,
+            global_vars=GlobalVariables(hf_model="/hf-local/model"),
+        )
+        assert pipeline.tasks[0].environment == {
+            "MODEL": "/hf-local/model",
+            "STATIC": "value",
+        }
+
+    def test_tasks_list_directly(self):
+        """Pipeline can receive tasks as a list directly."""
+        from core import SandboxPipeline, SandboxTask
+
+        tasks = [
+            SandboxTask(script="a.sh"),
+            SandboxTask(script="b.sh"),
+            SandboxTask(script="c.sh"),
+        ]
+        pipeline = SandboxPipeline(tasks=tasks)
+        assert len(pipeline.tasks) == 3
+        assert pipeline.tasks[2].script == "c.sh"
+
+    def test_no_global_vars_no_error(self):
+        """Pipeline without global_vars doesn't crash on interpolation."""
+        from core import SandboxPipeline, SandboxTask0
+
+        t0 = SandboxTask0(
+            script="test.sh",
+            args=["<<global_vars.hf_model>>"],
+        )
+        pipeline = SandboxPipeline(task_0=t0)
+        # No interpolation happens — args kept as-is
+        assert pipeline.tasks[0].args == ["<<global_vars.hf_model>>"]
+
+
+class TestGitInfo:
+    """Direct tests for _git_info helper."""
+
+    def test_valid_git_repo(self):
+        from core import _git_info
+
+        commit, branch = _git_info(os.getcwd())
+        assert commit != "unknown"
+        assert branch != "unknown"
+        assert len(commit) >= 7  # short hash
+
+    def test_nonexistent_directory(self):
+        from core import _git_info
+
+        commit, branch = _git_info("/tmp/nonexistent_xyz_12345")
+        assert commit == "unknown"
+        assert branch == "unknown"
+
+    def test_non_git_directory(self):
+        from core import _git_info
+
+        # Use /tmp which is outside any git repo
+        commit, branch = _git_info("/tmp")
+        # /tmp may or may not be inside a git worktree depending on the system
+        # Just verify it returns strings without crashing
+        assert isinstance(commit, str)
+        assert isinstance(branch, str)
+
+
+class TestRunJobsExtended:
+    """Extended run_jobs tests for env merging, test_level, and detach."""
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_environment_list_merged_to_env(self, mock_docker, mock_exp, tmp_path):
+        """List-of-dicts environment is merged into task_env."""
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        mock_exp_inst = MagicMock()
+        mock_exp_inst._id = "exp_env"
+        mock_exp_inst.__enter__ = MagicMock(return_value=mock_exp_inst)
+        mock_exp_inst.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_inst
+        mock_docker.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env()
+
+        t0 = SandboxTask0(
+            script="test.sh",
+            slurm_config=MagicMock(),
+            environment=[{"A": "1"}, {"B": "2"}],
+        )
+        pipeline = SandboxPipeline(task_0=t0)
+
+        with patch("core.run.Script") as mock_script:
+            run_jobs(
+                job_table={"job": pipeline},
+                hf_local="/tmp/hf",
+                user="u",
+                identity=None,
+                job_dir=str(tmp_path),
+                packager=MagicMock(),
+                default_slurm_env=slurm_env,
+                default_local_env=local_env,
+                base_dir=str(tmp_path),
+            )
+            # Script called with merged env
+            call_kwargs = mock_script.call_args[1]
+            assert "A" in call_kwargs["env"]
+            assert "B" in call_kwargs["env"]
+            assert call_kwargs["env"]["A"] == "1"
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_none_env_values_converted_to_empty_string(self, mock_docker, mock_exp, tmp_path):
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        mock_exp_inst = MagicMock()
+        mock_exp_inst._id = "exp_none"
+        mock_exp_inst.__enter__ = MagicMock(return_value=mock_exp_inst)
+        mock_exp_inst.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_inst
+        mock_docker.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env()
+
+        t0 = SandboxTask0(
+            script="test.sh",
+            slurm_config=MagicMock(),
+            environment=[{"KEY": None}],
+        )
+        pipeline = SandboxPipeline(task_0=t0)
+
+        with patch("core.run.Script") as mock_script:
+            run_jobs(
+                job_table={"job": pipeline},
+                hf_local="/tmp/hf",
+                user="u",
+                identity=None,
+                job_dir=str(tmp_path),
+                packager=MagicMock(),
+                default_slurm_env=slurm_env,
+                default_local_env=local_env,
+                base_dir=str(tmp_path),
+            )
+            env = mock_script.call_args[1]["env"]
+            assert env["KEY"] == ""
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_test_level_filters_pipeline(self, mock_docker, mock_exp, tmp_path):
+        """Pipelines with test_level > current are skipped."""
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        mock_exp_inst = MagicMock()
+        mock_exp_inst._id = "exp_lvl"
+        mock_exp_inst.__enter__ = MagicMock(return_value=mock_exp_inst)
+        mock_exp_inst.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_inst
+        mock_docker.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env()
+
+        t0 = SandboxTask0(script="test.sh", slurm_config=MagicMock())
+        pipeline = SandboxPipeline(task_0=t0, test_level=2)
+
+        run_jobs(
+            job_table={"job": pipeline},
+            hf_local="/tmp/hf",
+            user="u",
+            identity=None,
+            job_dir=str(tmp_path),
+            packager=MagicMock(),
+            default_slurm_env=slurm_env,
+            default_local_env=local_env,
+            test_level=0,  # lower than pipeline's test_level=2
+            base_dir=str(tmp_path),
+        )
+
+        # Experiment should not be created for skipped pipelines
+        mock_exp.assert_not_called()
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_skipped_pipeline_not_run(self, mock_docker, mock_exp, tmp_path):
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        slurm_env, local_env = get_default_env()
+
+        t0 = SandboxTask0(script="test.sh", slurm_config=MagicMock())
+        pipeline = SandboxPipeline(task_0=t0, skip=True)
+
+        run_jobs(
+            job_table={"job": pipeline},
+            hf_local="/tmp/hf",
+            user="u",
+            identity=None,
+            job_dir=str(tmp_path),
+            packager=MagicMock(),
+            default_slurm_env=slurm_env,
+            default_local_env=local_env,
+            base_dir=str(tmp_path),
+        )
+
+        mock_exp.assert_not_called()
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_detach_flag_passed_to_experiment(self, mock_docker, mock_exp, tmp_path):
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        mock_exp_inst = MagicMock()
+        mock_exp_inst._id = "exp_detach"
+        mock_exp_inst.__enter__ = MagicMock(return_value=mock_exp_inst)
+        mock_exp_inst.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_inst
+        mock_docker.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env()
+
+        t0 = SandboxTask0(script="test.sh", slurm_config=MagicMock())
+        pipeline = SandboxPipeline(task_0=t0)
+
+        run_jobs(
+            job_table={"job": pipeline},
+            hf_local="/tmp/hf",
+            user="u",
+            identity=None,
+            job_dir=str(tmp_path),
+            packager=MagicMock(),
+            default_slurm_env=slurm_env,
+            default_local_env=local_env,
+            detach=True,
+            base_dir=str(tmp_path),
+        )
+
+        mock_exp_inst.run.assert_called_once_with(detach=True)
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_version_report_called(self, mock_docker, mock_exp, tmp_path, capsys):
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        mock_exp_inst = MagicMock()
+        mock_exp_inst._id = "exp_ver"
+        mock_exp_inst.__enter__ = MagicMock(return_value=mock_exp_inst)
+        mock_exp_inst.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_inst
+        mock_docker.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env()
+
+        t0 = SandboxTask0(script="test.sh", slurm_config=MagicMock())
+        pipeline = SandboxPipeline(task_0=t0)
+
+        run_jobs(
+            job_table={"job": pipeline},
+            hf_local="/tmp/hf",
+            user="u",
+            identity=None,
+            job_dir=str(tmp_path),
+            packager=MagicMock(),
+            default_slurm_env=slurm_env,
+            default_local_env=local_env,
+            base_dir=str(tmp_path),
+        )
+
+        captured = capsys.readouterr()
+        assert "Version Report" in captured.out
diff --git a/tests/unit/launcher/test_docker_execution.py b/tests/unit/launcher/test_docker_execution.py
new file mode 100644
index 000000000..693071bb3
--- /dev/null
+++ b/tests/unit/launcher/test_docker_execution.py
@@ -0,0 +1,331 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Docker execution path — verifies build_docker_executor and run_jobs with mocked Docker.
+
+Coverage:
+    - build_docker_executor: container mounts, scratch dir creation, modelopt mount
+    - run_jobs with hf_local: Docker path selected, env vars applied, metadata written
+    - --yaml format end-to-end: YAML parsed, pipeline constructed, executor built
+"""
+
+import json
+import os
+from unittest.mock import MagicMock, patch
+
+
+class TestBuildDockerExecutor:
+    """Tests for build_docker_executor mount and directory setup."""
+
+    def test_scratch_dir_created(self, tmp_path):
+        from core import build_docker_executor
+
+        job_dir = str(tmp_path / "experiments")
+        build_docker_executor(
+            hf_local="/tmp/hf-local",
+            slurm_config=MagicMock(
+                local=False,
+                container="test:latest",
+                modelopt_install_path="/opt/modelopt",
+                container_mounts=None,
+                srun_args=None,
+                array=None,
+            ),
+            experiment_id="exp_123",
+            job_dir=job_dir,
+            task_name="task_0",
+            packager=MagicMock(),
+            modelopt_src_path="/tmp/modelopt",
+            experiment_title="cicd",
+        )
+        scratch_dir = os.path.join(job_dir, "cicd", "exp_123", "task_0")
+        assert os.path.isdir(scratch_dir)
+
+    def test_hf_local_mount(self, tmp_path):
+        from core import build_docker_executor
+
+        job_dir = str(tmp_path / "experiments")
+        executor = build_docker_executor(
+            hf_local="/my/hf-local",
+            slurm_config=MagicMock(
+                local=False,
+                container="test:latest",
+                modelopt_install_path="/opt/modelopt",
+                container_mounts=None,
+                srun_args=None,
+                array=None,
+            ),
+            experiment_id="exp_123",
+            job_dir=job_dir,
+            task_name="task_0",
+            packager=MagicMock(),
+            modelopt_src_path="/tmp/modelopt",
+            experiment_title="cicd",
+        )
+        volumes = executor.volumes
+        assert any("/my/hf-local:/hf-local" in v for v in volumes)
+
+    def test_scratchspace_mount(self, tmp_path):
+        from core import build_docker_executor
+
+        job_dir = str(tmp_path / "experiments")
+        executor = build_docker_executor(
+            hf_local="/tmp/hf",
+            slurm_config=MagicMock(
+                local=False,
+                container="test:latest",
+                modelopt_install_path="/opt/modelopt",
+                container_mounts=None,
+                srun_args=None,
+                array=None,
+            ),
+            experiment_id="exp_456",
+            job_dir=job_dir,
+            task_name="job_0",
+            packager=MagicMock(),
+            modelopt_src_path="/tmp/modelopt",
+            experiment_title="cicd",
+        )
+        volumes = executor.volumes
+        expected_scratch = os.path.join(job_dir, "cicd", "exp_456", "job_0")
+        assert any(f"{expected_scratch}:/scratchspace" in v for v in volumes)
+
+    def test_modelopt_mount(self, tmp_path):
+        from core import build_docker_executor
+
+        job_dir = str(tmp_path / "experiments")
+        executor = build_docker_executor(
+            hf_local="/tmp/hf",
+            slurm_config=MagicMock(
+                local=False,
+                container="test:latest",
+                modelopt_install_path="/opt/modelopt",
+                container_mounts=None,
+                srun_args=None,
+                array=None,
+            ),
+            experiment_id="exp_789",
+            job_dir=job_dir,
+            task_name="task_0",
+            packager=MagicMock(),
+            modelopt_src_path="/custom/modelopt",
+            experiment_title="cicd",
+        )
+        volumes = executor.volumes
+        assert any("/custom/modelopt:/opt/modelopt" in v for v in volumes)
+
+    def test_experiment_title_mount(self, tmp_path):
+        from core import build_docker_executor
+
+        job_dir = str(tmp_path / "experiments")
+        executor = build_docker_executor(
+            hf_local="/tmp/hf",
+            slurm_config=MagicMock(
+                local=False,
+                container="test:latest",
+                modelopt_install_path="/opt/modelopt",
+                container_mounts=None,
+                srun_args=None,
+                array=None,
+            ),
+            experiment_id="exp_123",
+            job_dir=job_dir,
+            task_name="task_0",
+            packager=MagicMock(),
+            modelopt_src_path="/tmp/modelopt",
+            experiment_title="modelopt",
+        )
+        volumes = executor.volumes
+        exp_title_path = os.path.join(job_dir, "modelopt")
+        assert any(f"{exp_title_path}:/modelopt" in v for v in volumes)
+
+    def test_local_slurm_config_mounts_preserved(self, tmp_path):
+        from core import build_docker_executor
+
+        job_dir = str(tmp_path / "experiments")
+        executor = build_docker_executor(
+            hf_local="/tmp/hf",
+            slurm_config=MagicMock(
+                local=True,
+                container="test:latest",
+                modelopt_install_path="/opt/modelopt",
+                container_mounts=["/data:/data", "/models:/models"],
+                srun_args=None,
+                array=None,
+            ),
+            experiment_id="exp_123",
+            job_dir=job_dir,
+            task_name="task_0",
+            packager=MagicMock(),
+            modelopt_src_path="/tmp/modelopt",
+            experiment_title="cicd",
+        )
+        volumes = executor.volumes
+        assert any("/data:/data" in v for v in volumes)
+        assert any("/models:/models" in v for v in volumes)
+
+
+class TestRunJobsDockerPath:
+    """Tests for run_jobs selecting Docker path when hf_local is set."""
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_docker_executor_called_with_hf_local(self, mock_docker, mock_exp, tmp_path):
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        mock_exp_instance = MagicMock()
+        mock_exp_instance._id = "test_exp_001"
+        mock_exp_instance.__enter__ = MagicMock(return_value=mock_exp_instance)
+        mock_exp_instance.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_instance
+
+        mock_docker.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env("cicd")
+
+        t0 = SandboxTask0(
+            script="echo hello",
+            slurm_config=MagicMock(),
+        )
+        pipeline = SandboxPipeline(task_0=t0)
+        job_table = {"test_job": pipeline}
+
+        run_jobs(
+            job_table=job_table,
+            hf_local="/tmp/hf-local",
+            user="testuser",
+            identity=None,
+            job_dir=str(tmp_path),
+            packager=MagicMock(),
+            default_slurm_env=slurm_env,
+            default_local_env=local_env,
+            experiment_title="cicd",
+            base_dir=str(tmp_path),
+        )
+
+        mock_docker.assert_called_once()
+        call_kwargs = mock_docker.call_args
+        assert call_kwargs[0][0] == "/tmp/hf-local"  # hf_local
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_metadata_written(self, mock_docker, mock_exp, tmp_path):
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        mock_exp_instance = MagicMock()
+        mock_exp_instance._id = "test_exp_meta"
+        mock_exp_instance.__enter__ = MagicMock(return_value=mock_exp_instance)
+        mock_exp_instance.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_instance
+
+        mock_docker.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env("cicd")
+
+        t0 = SandboxTask0(script="test.sh", slurm_config=MagicMock())
+        pipeline = SandboxPipeline(task_0=t0, allow_to_fail=True, note="test note")
+        job_table = {"meta_job": pipeline}
+
+        run_jobs(
+            job_table=job_table,
+            hf_local="/tmp/hf",
+            user="user",
+            identity=None,
+            job_dir=str(tmp_path),
+            packager=MagicMock(),
+            default_slurm_env=slurm_env,
+            default_local_env=local_env,
+            experiment_title="cicd",
+            base_dir=str(tmp_path),
+        )
+
+        metadata_path = os.path.join("experiments", "cicd", "test_exp_meta", "metadata.json")
+        assert os.path.exists(metadata_path)
+        with open(metadata_path) as f:
+            meta = json.load(f)
+        assert meta["experiment_id"] == "test_exp_meta"
+        assert meta["job_name"] == "meta_job"
+        assert meta["allow_to_fail"] is True
+        assert meta["note"] == "test note"
+
+    @patch("core.run.Experiment")
+    @patch("core.build_docker_executor")
+    def test_skipped_task_not_submitted(self, mock_docker, mock_exp, tmp_path):
+        from core import SandboxPipeline, SandboxTask0, SandboxTask1, get_default_env, run_jobs
+
+        mock_exp_instance = MagicMock()
+        mock_exp_instance._id = "test_exp_skip"
+        mock_exp_instance.__enter__ = MagicMock(return_value=mock_exp_instance)
+        mock_exp_instance.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_instance
+
+        mock_docker.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env("cicd")
+
+        t0 = SandboxTask0(script="run.sh", slurm_config=MagicMock(), skip=True)
+        t1 = SandboxTask1(script="eval.sh", slurm_config=MagicMock())
+        pipeline = SandboxPipeline(task_0=t0, task_1=t1)
+        job_table = {"skip_job": pipeline}
+
+        run_jobs(
+            job_table=job_table,
+            hf_local="/tmp/hf",
+            user="user",
+            identity=None,
+            job_dir=str(tmp_path),
+            packager=MagicMock(),
+            default_slurm_env=slurm_env,
+            default_local_env=local_env,
+            experiment_title="cicd",
+            base_dir=str(tmp_path),
+        )
+
+        # Only task_1 should be submitted (task_0 is skipped)
+        assert mock_docker.call_count == 1
+
+    @patch("core.run.Experiment")
+    @patch("core.build_slurm_executor")
+    def test_slurm_executor_called_without_hf_local(self, mock_slurm, mock_exp, tmp_path):
+        from core import SandboxPipeline, SandboxTask0, get_default_env, run_jobs
+
+        mock_exp_instance = MagicMock()
+        mock_exp_instance._id = "test_exp_slurm"
+        mock_exp_instance.__enter__ = MagicMock(return_value=mock_exp_instance)
+        mock_exp_instance.__exit__ = MagicMock(return_value=False)
+        mock_exp.return_value = mock_exp_instance
+
+        mock_slurm.return_value = MagicMock()
+
+        slurm_env, local_env = get_default_env("cicd")
+
+        t0 = SandboxTask0(script="train.sh", slurm_config=MagicMock())
+        pipeline = SandboxPipeline(task_0=t0)
+        job_table = {"slurm_job": pipeline}
+
+        run_jobs(
+            job_table=job_table,
+            hf_local=None,  # No hf_local → Slurm path
+            user="user",
+            identity=None,
+            job_dir=str(tmp_path),
+            packager=MagicMock(),
+            default_slurm_env=slurm_env,
+            default_local_env=local_env,
+            experiment_title="cicd",
+            base_dir=str(tmp_path),
+        )
+
+        mock_slurm.assert_called_once()
diff --git a/tests/unit/launcher/test_docker_launch.py b/tests/unit/launcher/test_docker_launch.py
new file mode 100644
index 000000000..8baad32c8
--- /dev/null
+++ b/tests/unit/launcher/test_docker_launch.py
@@ -0,0 +1,124 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Integration test for Docker container launch via run_jobs.
+
+Requires Docker to be installed and running. Uses python:3.12-slim
+(lightweight, no GPU needed) to run a trivial script.
+
+Run with: pytest -s  (stdin capture must be disabled for invoke/fabric)
+"""
+
+import os
+import shutil
+import subprocess
+
+import pytest
+
+docker_available = shutil.which("docker") is not None
+
+
+@pytest.mark.skipif(not docker_available, reason="Docker not available")
+class TestDockerLaunch:
+    """End-to-end Docker launch test using subprocess to avoid pytest stdin capture issues."""
+
+    def test_echo_script_via_launch(self, tmp_path):
+        """Launch a Docker container via launch.py subprocess that runs 'echo hello'."""
+        # Create a trivial script
+        script_dir = tmp_path / "scripts"
+        script_dir.mkdir()
+        script = script_dir / "hello.sh"
+        script.write_text("#!/bin/bash\necho 'HELLO_FROM_DOCKER'\n")
+        script.chmod(0o755)
+
+        # Create a YAML config
+        yaml_content = """
+job_name: test_hello
+pipeline:
+  task_0:
+    script: scripts/hello.sh
+    slurm_config:
+      _factory_: "slurm_factory"
+      container: python:3.12-slim
+"""
+        yaml_path = tmp_path / "test.yaml"
+        yaml_path.write_text(yaml_content)
+
+        # Run launch.py as a subprocess (avoids pytest stdin capture issues)
+        launcher_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "launcher")
+        launcher_dir = os.path.abspath(launcher_dir)
+
+        result = subprocess.run(
+            [
+                "uv",
+                "run",
+                "launch.py",
+                "--yaml",
+                str(yaml_path),
+                f"hf_local={tmp_path}",
+                "--yes",
+            ],
+            cwd=launcher_dir,
+            capture_output=True,
+            text=True,
+            timeout=300,
+        )
+
+        # Check output
+        assert "Version Report" in result.stdout
+        assert "Launching" in result.stdout or "Entering Experiment" in result.stdout
+
+    def test_failing_script_via_launch(self, tmp_path):
+        """Launch a Docker container that exits 1 — launch.py should not crash."""
+        script_dir = tmp_path / "scripts"
+        script_dir.mkdir()
+        script = script_dir / "fail.sh"
+        script.write_text("#!/bin/bash\necho 'FAILING'\nexit 1\n")
+        script.chmod(0o755)
+
+        yaml_content = """
+job_name: test_fail
+pipeline:
+  task_0:
+    script: scripts/fail.sh
+    slurm_config:
+      _factory_: "slurm_factory"
+      container: python:3.12-slim
+"""
+        yaml_path = tmp_path / "fail_test.yaml"
+        yaml_path.write_text(yaml_content)
+
+        launcher_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "launcher")
+        launcher_dir = os.path.abspath(launcher_dir)
+
+        result = subprocess.run(
+            [
+                "uv",
+                "run",
+                "launch.py",
+                "--yaml",
+                str(yaml_path),
+                f"hf_local={tmp_path}",
+                "--yes",
+            ],
+            cwd=launcher_dir,
+            capture_output=True,
+            text=True,
+            timeout=300,
+        )
+
+        # launch.py should complete (exit 0) even if the job fails
+        # The job failure is reported in stdout
+        assert "Version Report" in result.stdout
diff --git a/tests/unit/launcher/test_slurm_config.py b/tests/unit/launcher/test_slurm_config.py
new file mode 100644
index 000000000..aeb09200e
--- /dev/null
+++ b/tests/unit/launcher/test_slurm_config.py
@@ -0,0 +1,118 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for launcher/slurm_config.py — SlurmConfig dataclass and factory.
+
+Coverage:
+    - SlurmConfig: default values, field types
+    - slurm_factory: default behavior, env var overrides (SLURM_HOST, SLURM_ACCOUNT,
+      SLURM_HF_LOCAL), return type
+"""
+
+
+class TestSlurmConfig:
+    """Tests for the SlurmConfig dataclass."""
+
+    def test_defaults(self):
+        from slurm_config import SlurmConfig
+
+        cfg = SlurmConfig()
+        assert cfg.host is None
+        assert cfg.port == 22
+        assert cfg.account is None
+        assert cfg.partition == "batch"
+        assert cfg.container is None
+        assert cfg.nodes == 1
+        assert cfg.ntasks_per_node == 1
+        assert cfg.gpus_per_node == 1
+        assert cfg.local is False
+        assert cfg.container_mounts is None
+        assert cfg.srun_args is None
+        assert cfg.array is None
+
+    def test_custom_values(self):
+        from slurm_config import SlurmConfig
+
+        cfg = SlurmConfig(
+            host="login.example.com",
+            account="my_account",
+            nodes=4,
+            gpus_per_node=8,
+            container="nvcr.io/nvidia/pytorch:24.01-py3",
+            container_mounts=["/data:/data"],
+            srun_args=["--no-container-mount-home"],
+        )
+        assert cfg.host == "login.example.com"
+        assert cfg.account == "my_account"
+        assert cfg.nodes == 4
+        assert cfg.gpus_per_node == 8
+        assert cfg.container_mounts == ["/data:/data"]
+
+
+class TestSlurmFactory:
+    """Tests for the slurm_factory function."""
+
+    def test_default_returns_slurm_config(self):
+        from slurm_config import slurm_factory
+
+        cfg = slurm_factory()
+        # slurm_factory with @run.autoconvert returns a nemo-run Config wrapper
+        assert "SlurmConfig" in repr(cfg)
+
+    def test_default_container(self):
+        from slurm_config import slurm_factory
+
+        cfg = slurm_factory()
+        assert "tensorrt-llm" in cfg.container
+
+    def test_default_srun_args(self):
+        from slurm_config import slurm_factory
+
+        cfg = slurm_factory()
+        assert cfg.srun_args == ["--no-container-mount-home"]
+
+    def test_default_container_mounts_from_env(self, monkeypatch):
+        monkeypatch.setenv("SLURM_HF_LOCAL", "/custom/hf-local")
+        # Need to re-import to pick up the env var in the default
+        # The factory reads SLURM_HF_LOCAL at call time via the default arg
+        import importlib
+
+        import slurm_config
+
+        importlib.reload(slurm_config)
+        cfg = slurm_config.slurm_factory()
+        assert any("/custom/hf-local:/hf-local" in m for m in cfg.container_mounts)
+
+    def test_override_nodes(self):
+        from slurm_config import slurm_factory
+
+        cfg = slurm_factory(nodes=8)
+        assert cfg.nodes == 8
+
+    def test_override_partition(self):
+        from slurm_config import slurm_factory
+
+        cfg = slurm_factory(partition="gpu")
+        assert cfg.partition == "gpu"
+
+    def test_env_var_host(self, monkeypatch):
+        monkeypatch.setenv("SLURM_HOST", "test-host.example.com")
+        import importlib
+
+        import slurm_config
+
+        importlib.reload(slurm_config)
+        cfg = slurm_config.slurm_factory()
+        assert cfg.host == "test-host.example.com"
diff --git a/tests/unit/launcher/test_slurm_executor.py b/tests/unit/launcher/test_slurm_executor.py
new file mode 100644
index 000000000..48004c786
--- /dev/null
+++ b/tests/unit/launcher/test_slurm_executor.py
@@ -0,0 +1,230 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for build_slurm_executor — container mounts, scratch paths, executor params.
+
+Note: actual SSH tunnel and sbatch submission are not tested (require live infra).
+We mock run.SSHTunnel and run.SlurmExecutor to verify the arguments passed.
+"""
+
+from unittest.mock import MagicMock, patch
+
+
+class TestBuildSlurmExecutor:
+    """Tests for build_slurm_executor mount construction and executor params."""
+
+    @patch("core.run.SlurmExecutor")
+    @patch("core.run.SSHTunnel")
+    def test_scratch_and_modelopt_mounts(self, mock_tunnel, mock_executor):
+        from core import build_slurm_executor
+
+        mock_tunnel.return_value = MagicMock()
+
+        slurm_config = MagicMock(
+            host="test-host",
+            port=22,
+            account="test_account",
+            partition="batch",
+            container="nvcr.io/test:latest",
+            modelopt_install_path="/opt/modelopt",
+            container_mounts=["/hf-local:/hf-local"],
+            srun_args=["--no-container-mount-home"],
+            nodes=1,
+            ntasks_per_node=4,
+            gpus_per_node=4,
+            array=None,
+        )
+
+        build_slurm_executor(
+            user="testuser",
+            identity=None,
+            slurm_config=slurm_config,
+            experiment_id="exp_001",
+            job_dir="/lustre/experiments",
+            task_name="job_0",
+            packager=MagicMock(),
+            experiment_title="cicd",
+        )
+
+        # Check SlurmExecutor was called
+        mock_executor.assert_called_once()
+        call_kwargs = mock_executor.call_args[1]
+
+        # Verify container mounts include scratch, modelopt, and experiment title
+        mounts = call_kwargs["container_mounts"]
+        assert any("/scratchspace" in m for m in mounts)
+        assert any("/opt/modelopt" in m for m in mounts)
+        assert any("/cicd" in m for m in mounts)
+        # Original mount preserved
+        assert any("/hf-local:/hf-local" in m for m in mounts)
+
+    @patch("core.run.SlurmExecutor")
+    @patch("core.run.SSHTunnel")
+    def test_scratch_path_uses_experiment_title(self, mock_tunnel, mock_executor):
+        from core import build_slurm_executor
+
+        mock_tunnel.return_value = MagicMock()
+
+        slurm_config = MagicMock(
+            host="host",
+            port=22,
+            account="acct",
+            partition="batch",
+            container="img",
+            modelopt_install_path="/opt/mo",
+            container_mounts=[],
+            srun_args=[],
+            nodes=1,
+            ntasks_per_node=1,
+            gpus_per_node=1,
+            array=None,
+        )
+
+        build_slurm_executor(
+            user="u",
+            identity=None,
+            slurm_config=slurm_config,
+            experiment_id="exp_xyz",
+            job_dir="/data",
+            task_name="task_0",
+            packager=MagicMock(),
+            experiment_title="modelopt",
+        )
+
+        mounts = mock_executor.call_args[1]["container_mounts"]
+        assert any("/data/modelopt/exp_xyz:/scratchspace" in m for m in mounts)
+        assert any("/data/modelopt:/modelopt" in m for m in mounts)
+
+    @patch("core.run.SlurmExecutor")
+    @patch("core.run.SSHTunnel")
+    def test_tunnel_created_with_correct_params(self, mock_tunnel, mock_executor):
+        from core import build_slurm_executor
+
+        mock_tunnel.return_value = MagicMock()
+
+        slurm_config = MagicMock(
+            host="login.cluster.com",
+            port=30022,
+            account="acct",
+            partition="batch",
+            container="img",
+            modelopt_install_path="/opt/mo",
+            container_mounts=[],
+            srun_args=[],
+            nodes=1,
+            ntasks_per_node=1,
+            gpus_per_node=1,
+            array=None,
+        )
+
+        build_slurm_executor(
+            user="myuser",
+            identity="/home/.ssh/id_rsa",
+            slurm_config=slurm_config,
+            experiment_id="exp_1",
+            job_dir="/job",
+            task_name="t0",
+            packager=MagicMock(),
+        )
+
+        mock_tunnel.assert_called_once()
+        tunnel_kwargs = mock_tunnel.call_args[1]
+        assert tunnel_kwargs["host"] == "login.cluster.com"
+        assert tunnel_kwargs["user"] == "myuser"
+        assert tunnel_kwargs["port"] == 30022
+        assert tunnel_kwargs["identity"] == "/home/.ssh/id_rsa"
+        assert tunnel_kwargs["job_dir"] == "/job"
+
+    @patch("core.run.SlurmExecutor")
+    @patch("core.run.SSHTunnel")
+    def test_executor_params(self, mock_tunnel, mock_executor):
+        from core import build_slurm_executor
+
+        mock_tunnel.return_value = MagicMock()
+
+        slurm_config = MagicMock(
+            host="h",
+            port=22,
+            account="my_acct",
+            partition="gpu",
+            container="nvcr.io/img:v1",
+            modelopt_install_path="/opt/mo",
+            container_mounts=[],
+            srun_args=["--mpi=pmix"],
+            nodes=2,
+            ntasks_per_node=8,
+            gpus_per_node=8,
+            array="0-3",
+        )
+
+        packager = MagicMock()
+        build_slurm_executor(
+            user="u",
+            identity=None,
+            slurm_config=slurm_config,
+            experiment_id="e1",
+            job_dir="/j",
+            task_name="t0",
+            packager=packager,
+        )
+
+        kw = mock_executor.call_args[1]
+        assert kw["account"] == "my_acct"
+        assert kw["partition"] == "gpu"
+        assert kw["nodes"] == 2
+        assert kw["ntasks_per_node"] == 8
+        assert kw["gpus_per_node"] == 8
+        assert kw["container_image"] == "nvcr.io/img:v1"
+        assert kw["srun_args"] == ["--mpi=pmix"]
+        assert kw["array"] == "0-3"
+        assert kw["packager"] is packager
+        assert kw["time"] == "04:00:00"
+        assert kw["retries"] == 0
+
+    @patch("core.run.SlurmExecutor")
+    @patch("core.run.SSHTunnel")
+    def test_none_container_mounts_handled(self, mock_tunnel, mock_executor):
+        from core import build_slurm_executor
+
+        mock_tunnel.return_value = MagicMock()
+
+        slurm_config = MagicMock(
+            host="h",
+            port=22,
+            account="a",
+            partition="b",
+            container="c",
+            modelopt_install_path="/m",
+            container_mounts=None,
+            srun_args=None,
+            nodes=1,
+            ntasks_per_node=1,
+            gpus_per_node=1,
+            array=None,
+        )
+
+        build_slurm_executor(
+            user="u",
+            identity=None,
+            slurm_config=slurm_config,
+            experiment_id="e",
+            job_dir="/j",
+            task_name="t",
+            packager=MagicMock(),
+        )
+
+        # Should not crash; mounts should still include scratch + modelopt + title
+        mounts = mock_executor.call_args[1]["container_mounts"]
+        assert len(mounts) >= 3
diff --git a/tests/unit/launcher/test_yaml_formats.py b/tests/unit/launcher/test_yaml_formats.py
new file mode 100644
index 000000000..571535343
--- /dev/null
+++ b/tests/unit/launcher/test_yaml_formats.py
@@ -0,0 +1,193 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for YAML config parsing — verifies that different YAML formats produce correct dataclasses.
+
+Coverage:
+    - --yaml format: top-level job_name + pipeline with task_0, environment, slurm_config
+    - pipeline=@ format: bare SandboxPipeline without job_name wrapper
+    - task_configs: list of YAML paths resolved via factory registry
+    - Environment formats: list-of-dicts and flat dict both parsed correctly
+    - Global vars: <<global_vars.X>> resolved in both args and environment
+"""
+
+import yaml
+
+
+class TestYamlFormatParsing:
+    """Tests that YAML content parses into correct dataclass structures."""
+
+    def test_yaml_format_with_job_name(self, tmp_yaml):
+        """The --yaml format has job_name and pipeline as top-level keys."""
+        content = """
+job_name: test_job
+pipeline:
+  skip: false
+  allow_to_fail: true
+  note: "test note"
+  task_0:
+    script: test.sh
+    args:
+      - --flag
+    environment:
+      - KEY: value
+"""
+        path = tmp_yaml(content)
+        with open(path) as f:
+            data = yaml.safe_load(f)
+
+        assert data["job_name"] == "test_job"
+        assert data["pipeline"]["skip"] is False
+        assert data["pipeline"]["allow_to_fail"] is True
+        assert data["pipeline"]["note"] == "test note"
+        assert data["pipeline"]["task_0"]["script"] == "test.sh"
+        assert data["pipeline"]["task_0"]["args"] == ["--flag"]
+        assert data["pipeline"]["task_0"]["environment"] == [{"KEY": "value"}]
+
+    def test_bare_pipeline_format(self, tmp_yaml):
+        """The pipeline=@ format is a bare SandboxPipeline without wrapper."""
+
+        content = """
+task_0:
+  script: a.sh
+  args:
+    - --foo
+task_1:
+  script: b.sh
+allow_to_fail: false
+skip: false
+"""
+        path = tmp_yaml(content)
+        with open(path) as f:
+            data = yaml.safe_load(f)
+
+        # Verify the YAML parses into valid SandboxPipeline kwargs
+        # (nemo-run does this via its CLI parser; we just verify the structure)
+        assert "task_0" in data
+        assert "task_1" in data
+        assert data["task_0"]["script"] == "a.sh"
+        assert data["task_1"]["script"] == "b.sh"
+
+    def test_task_configs_format(self, tmp_yaml):
+        """task_configs lists YAML files that are resolved into tasks."""
+        from core import SandboxPipeline, register_factory
+
+        def local_factory(nodes=1):
+            return {"nodes": nodes}
+
+        register_factory("local_factory", local_factory)
+
+        task_path = tmp_yaml(
+            """
+script: worker.sh
+args:
+  - --batch-size 32
+slurm_config:
+  _factory_: "local_factory"
+  nodes: 2
+""",
+            name="worker.yaml",
+        )
+
+        pipeline = SandboxPipeline(task_configs=[task_path])
+        assert len(pipeline.tasks) == 1
+        assert pipeline.tasks[0].script == "worker.sh"
+        assert pipeline.tasks[0].args == ["--batch-size 32"]
+        assert pipeline.tasks[0].slurm_config == {"nodes": 2}
+
+    def test_environment_list_of_dicts(self):
+        """Environment as list-of-single-key-dicts (nemo-run format)."""
+        from core import SandboxTask
+
+        task = SandboxTask(
+            script="test.sh",
+            environment=[{"A": "1"}, {"B": "2"}, {"C": "3"}],
+        )
+        assert len(task.environment) == 3
+        assert task.environment[0] == {"A": "1"}
+
+    def test_global_vars_across_multiple_tasks(self, tmp_yaml):
+        """Global vars resolve in both task_0 and task_1."""
+        from core import GlobalVariables, SandboxPipeline, SandboxTask0, SandboxTask1
+
+        t0 = SandboxTask0(
+            script="quantize.sh",
+            args=["--model", "<<global_vars.hf_model>>"],
+            environment=[{"HF_MODEL": "<<global_vars.hf_model>>"}],
+        )
+        t1 = SandboxTask1(
+            script="eval.sh",
+            environment=[{"HF_MODEL": "<<global_vars.hf_model>>"}],
+        )
+        pipeline = SandboxPipeline(
+            task_0=t0,
+            task_1=t1,
+            global_vars=GlobalVariables(hf_model="/hf-local/Qwen/Qwen3-8B"),
+        )
+        assert pipeline.tasks[0].args == ["--model", "/hf-local/Qwen/Qwen3-8B"]
+        assert pipeline.tasks[0].environment == [{"HF_MODEL": "/hf-local/Qwen/Qwen3-8B"}]
+        assert pipeline.tasks[1].environment == [{"HF_MODEL": "/hf-local/Qwen/Qwen3-8B"}]
+
+
+class TestTestYamlFormat:
+    """Tests for the test YAML format used by run_test_yaml.sh."""
+
+    def test_target_with_overrides(self, tmp_yaml):
+        """Test YAML entries have _target_ and override fields."""
+        content = """
+- _target_: path/to/config.yaml
+  pipeline:
+    allow_to_fail: true
+    skip: false
+    note: "known issue"
+- _target_: path/to/other.yaml
+  pipeline:
+    allow_to_fail: false
+"""
+        path = tmp_yaml(content)
+        with open(path) as f:
+            data = yaml.safe_load(f)
+
+        assert isinstance(data, list)
+        assert len(data) == 2
+        assert data[0]["_target_"] == "path/to/config.yaml"
+        assert data[0]["pipeline"]["allow_to_fail"] is True
+        assert data[0]["pipeline"]["note"] == "known issue"
+        assert data[1]["_target_"] == "path/to/other.yaml"
+        assert data[1]["pipeline"]["allow_to_fail"] is False
+
+    def test_flatten_overrides(self):
+        """Nested overrides flatten to dot-notation for CLI args."""
+        entry = {
+            "pipeline": {
+                "allow_to_fail": True,
+                "skip": False,
+            }
+        }
+
+        # Simulate the flatten logic from run_test_yaml.sh
+        overrides = []
+
+        def flatten(d, prefix=""):
+            for k, v in d.items():
+                key = f"{prefix}{k}" if prefix else k
+                if isinstance(v, dict):
+                    flatten(v, f"{key}.")
+                else:
+                    overrides.append(f"{key}={v}")
+
+        flatten(entry)
+        assert "pipeline.allow_to_fail=True" in overrides
+        assert "pipeline.skip=False" in overrides

From 59cdedea845a3af399f5e37a368fc5a0f0c77907 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sat, 14 Mar 2026 19:50:13 -0700
Subject: [PATCH 07/12] fix: replace Model-Optimizer submodule with symlink to
 parent

Remove self-referential launcher/modules/Model-Optimizer submodule
(flagged in PR review as creating recursive nesting). Replace with a
symlink to ../.. (the Model-Optimizer root). The packager's find
follows symlinks so modelopt/* and examples/* are packaged correctly.

Verified: Docker launch with symlink works (quantize step finds modelopt).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 .gitmodules                      | 3 ---
 launcher/modules/Model-Optimizer | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)
 mode change 160000 => 120000 launcher/modules/Model-Optimizer

diff --git a/.gitmodules b/.gitmodules
index 23a5af209..87630967d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
 [submodule "launcher/modules/Megatron-LM"]
 	path = launcher/modules/Megatron-LM
 	url = https://github.com/AAnoosheh/Megatron-LM.git
-[submodule "launcher/modules/Model-Optimizer"]
-	path = launcher/modules/Model-Optimizer
-	url = https://github.com/NVIDIA/Model-Optimizer.git
diff --git a/launcher/modules/Model-Optimizer b/launcher/modules/Model-Optimizer
deleted file mode 160000
index 69c0d4794..000000000
--- a/launcher/modules/Model-Optimizer
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 69c0d47946086d032e665ecf59a9ff28dc32f5b8
diff --git a/launcher/modules/Model-Optimizer b/launcher/modules/Model-Optimizer
new file mode 120000
index 000000000..c25bddb6d
--- /dev/null
+++ b/launcher/modules/Model-Optimizer
@@ -0,0 +1 @@
+../..
\ No newline at end of file

From bf91e2ba3ba8c391ceb65748bd443df9f1308892 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sat, 14 Mar 2026 21:13:49 -0700
Subject: [PATCH 08/12] chg: docs, gitignore, hf_local global_vars, symlink
 auto-creation

Add launcher/.gitignore, CLAUDE.md. Update README with hf_local docs,
test instructions, verified results. Fix ADVANCED.md stale paths. Add
hf_local to GlobalVariables. Use <<global_vars.hf_local>> in YAML.
Remove stale services/* from packager. quantize.sh reads MMLU_DATASET
env var. launch.py auto-creates Model-Optimizer symlink.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 launcher/.gitignore                           |  22 ++
 launcher/ADVANCED.md                          |  24 +-
 launcher/CLAUDE.md                            | 117 ++++++++++
 launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml   |  20 +-
 launcher/README.md                            |  96 ++++++--
 .../common/megatron-lm/quantize/quantize.sh   |   2 +-
 launcher/core.py                              |   1 +
 launcher/launch.py                            |   9 +-
 uv.lock                                       | 217 +++++++++++++++++-
 9 files changed, 468 insertions(+), 40 deletions(-)
 create mode 100644 launcher/.gitignore
 create mode 100644 launcher/CLAUDE.md

diff --git a/launcher/.gitignore b/launcher/.gitignore
new file mode 100644
index 000000000..3eb4a4907
--- /dev/null
+++ b/launcher/.gitignore
@@ -0,0 +1,22 @@
+# Virtual environment
+.venv/
+
+# nemo-run state
+.slurm_jobs
+.docker_jobs.json
+.local_jobs.json
+
+# Experiment artifacts (generated at runtime)
+experiments/
+local_experiments/
+
+# uv lock (generated, not portable)
+uv.lock
+
+# Python cache
+__pycache__/
+
+# Editor swap files
+*.swp
+*.swo
+*~
diff --git a/launcher/ADVANCED.md b/launcher/ADVANCED.md
index fb4bc0256..8698f4ce8 100644
--- a/launcher/ADVANCED.md
+++ b/launcher/ADVANCED.md
@@ -35,13 +35,25 @@ code/
 ├── modules/
 │   ├── Megatron-LM/megatron/...      # Training framework
 │   └── Model-Optimizer/modelopt/...   # ModelOpt library (mounted over container install)
-└── services/
-    └── megatron-lm/quantize/
-        └── quantize.sh               # Job script
+└── common/
+    ├── megatron-lm/quantize/
+    │   └── quantize.sh               # PTQ quantization + MMLU
+    ├── tensorrt-llm/query.sh          # TRT-LLM server + query
+    ├── vllm/query.sh                  # vLLM server + query
+    ├── eagle3/                        # EAGLE3 pipeline scripts
+    └── query.py                       # OpenAI-compatible query client
 ```
 
 The `modelopt/` directory is bind-mounted over the container's installed ModelOpt, so your local changes take effect without rebuilding the container.
 
+### Model-Optimizer Symlink
+
+`launcher/modules/Model-Optimizer` is a **symlink** to `../..` (the Model-Optimizer root), not a git submodule. This avoids recursive nesting — the launcher lives inside Model-Optimizer and references its own parent.
+
+- Git tracks the symlink natively (`git clone` preserves it)
+- `launch.py` auto-creates the symlink on first run if it's missing
+- The packager's `find` follows symlinks, so `modules/Model-Optimizer/modelopt/*` resolves correctly
+
 ### Factory System
 
 Slurm cluster configs use a factory pattern. YAMLs reference a factory by name:
@@ -70,7 +82,7 @@ SLURM_CLUSTER=cw_dfw uv run slurm.py --yaml config.yaml --yes
 job_name: Qwen3-8B_NVFP4
 pipeline:
   task_0:
-    script: services/megatron-lm/quantize/quantize.sh
+    script: common/megatron-lm/quantize/quantize.sh
     slurm_config:
       _factory_: "slurm_factory"
 ```
@@ -79,12 +91,12 @@ pipeline:
 
 ```yaml
 task_0:
-  script: services/megatron-lm/quantize/quantize.sh
+  script: common/megatron-lm/quantize/quantize.sh
   slurm_config:
     _factory_: "slurm_factory"
 ```
 
-**Test YAML format** — list of jobs with `_target_` and overrides, used by `tools/run_test_yaml.sh`:
+**Test YAML format** — list of jobs with `_target_` and overrides, used by nmm-sandbox's `tools/run_test_yaml.sh` for CI:
 
 ```yaml
 - _target_: Qwen/Qwen3-8B/megatron_lm_ptq.yaml
diff --git a/launcher/CLAUDE.md b/launcher/CLAUDE.md
new file mode 100644
index 000000000..288923272
--- /dev/null
+++ b/launcher/CLAUDE.md
@@ -0,0 +1,117 @@
+# CLAUDE.md — ModelOpt Launcher
+
+## Overview
+
+The launcher submits ModelOpt quantization, training, and evaluation jobs to Slurm clusters or runs them locally with Docker. It shares core logic (`core.py`) with [nmm-sandbox](https://gitlab-master.nvidia.com/omniml/integration/nmm-sandbox)'s `slurm.py`.
+
+## Key Files
+
+| File | Role |
+|------|------|
+| `launch.py` | Public entrypoint — accepts `--yaml` or `pipeline=@` |
+| `core.py` | Shared dataclasses, executor builders, run loop, version reporting |
+| `slurm_config.py` | `SlurmConfig` dataclass and env-var-driven `slurm_factory` |
+| `common/` | Shell scripts and `query.py` packaged to the cluster |
+| `modules/Megatron-LM/` | Git submodule |
+| `modules/Model-Optimizer` | Symlink to `../..` (auto-created by `launch.py` if missing) |
+
+## Common Commands
+
+```shell
+# Run locally with Docker
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml hf_local=/mnt/hf-local --yes
+
+# Run on Slurm (set env vars first)
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+
+# Dry run — preview resolved config
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --dryrun --yes -v
+
+# Dump resolved config
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --to-yaml resolved.yaml
+
+# Run unit tests
+uv pip install pytest
+uv run python3 -m pytest ../tests/unit/launcher/ -v -o "addopts=" --confcutdir=../tests/unit/launcher
+```
+
+## YAML Config Format
+
+The `--yaml` format maps top-level keys to `launch()` function arguments:
+
+```yaml
+job_name: Qwen3-8B_NVFP4_DEFAULT_CFG
+pipeline:
+  global_vars:
+    hf_local: /hf-local/
+  task_0:
+    script: common/megatron-lm/quantize/quantize.sh
+    args:
+      - --calib-dataset-path-or-name <<global_vars.hf_local>>abisee/cnn_dailymail
+    environment:
+      - MLM_MODEL_CFG: Qwen/Qwen3-8B
+      - HF_MODEL_CKPT: <<global_vars.hf_local>>Qwen/Qwen3-8B
+      - TP: 4
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 4
+      gpus_per_node: 4
+```
+
+Key conventions:
+
+- Scripts go in `common/` (not `services/`)
+- `<<global_vars.X>>` interpolation for shared values across tasks
+- `_factory_: "slurm_factory"` — resolved via `register_factory()` in `core.py`
+- Environment is list-of-single-key-dicts: `- KEY: value`
+- CLI overrides: `pipeline.task_0.slurm_config.nodes=2`
+
+## Architecture
+
+```text
+launch.py → imports core.py + slurm_config.py
+               ↓
+           core.run_jobs()
+               ↓
+         build_docker_executor() or build_slurm_executor()
+               ↓
+         nemo_run.Experiment → Docker or Slurm
+```
+
+- `set_slurm_config_type(SlurmConfig)` — patches `SandboxTask` annotation at import time
+- `register_factory("slurm_factory", slurm_factory)` — enables YAML `_factory_` resolution
+- `report_versions(base_dir)` — prints git commit/branch for launcher + submodules
+- `get_default_env(title)` — returns `(slurm_env, local_env)` dicts
+
+## Adding a New Model Config
+
+1. Create `<Org>/<Model>/megatron_lm_ptq.yaml` following the format above
+2. Set `MLM_MODEL_CFG` to the HuggingFace repo ID
+3. Set `QUANT_CFG` (e.g., `NVFP4_DEFAULT_CFG`, `INT8_DEFAULT_CFG`)
+4. Set GPU/node counts based on model size
+5. Test: `uv run launch.py --yaml <path> --dryrun --yes -v`
+
+## Testing
+
+64 unit tests in `tests/unit/launcher/`. Run standalone without installing `modelopt`:
+
+```shell
+uv run python3 -m pytest ../tests/unit/launcher/ -v -o "addopts=" --confcutdir=../tests/unit/launcher
+```
+
+Tests cover: core dataclasses, factory registry, global_vars interpolation, YAML formats, Docker/Slurm executor construction (mocked), environment merging, metadata writing, and end-to-end Docker launch via subprocess.
+
+## Compatibility with nmm-sandbox
+
+The same YAML works with both launchers:
+
+```shell
+# nmm-sandbox (internal)
+uv run slurm.py --yaml modules/Model-Optimizer/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+
+# Model-Optimizer/launcher (public)
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
+```
+
+Differences: `slurm.py` has internal cluster factories, `job_yaml` batch mode (via `tools/run_job_yaml.sh`), CI review integration, and `SLURM_CLUSTER` env var for factory selection.
diff --git a/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml b/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
index ce7f81224..ea83960ef 100644
--- a/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
+++ b/launcher/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
@@ -4,18 +4,32 @@ pipeline:
   allow_to_fail: false
   note:
 
+  # hf_local: path prefix for model weights and datasets.
+  #
+  # This should be a self-managed directory that mirrors the HuggingFace Hub
+  # hierarchy (e.g., /hf-local/Qwen/Qwen3-8B/, /hf-local/cais/mmlu/). Using
+  # a dedicated folder is preferred over the HuggingFace cache (~/.cache/huggingface)
+  # to avoid cache corruption issues with concurrent jobs.
+  #
+  # Override on CLI:
+  #   pipeline.global_vars.hf_local=/mnt/my-models/   # use a different path
+  #   pipeline.global_vars.hf_local=""                 # download from HuggingFace Hub
+  global_vars:
+    hf_local: /hf-local/
+
   task_0:
     script: common/megatron-lm/quantize/quantize.sh
     args:
-      - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
+      - --calib-dataset-path-or-name <<global_vars.hf_local>>abisee/cnn_dailymail
       - --calib-size 32
     environment:
       - MLM_MODEL_CFG: Qwen/Qwen3-8B
       - QUANT_CFG: NVFP4_DEFAULT_CFG
+      - HF_MODEL_CKPT: <<global_vars.hf_local>>Qwen/Qwen3-8B
+      - MMLU_DATASET: <<global_vars.hf_local>>cais/mmlu
       - TP: 4
     slurm_config:
-      _factory_: "slurm_factory" # oci_hsg_slurm_factory
+      _factory_: "slurm_factory"
       nodes: 1
       ntasks_per_node: 4
       gpus_per_node: 4
-
diff --git a/launcher/README.md b/launcher/README.md
index 725363341..d5365a2fc 100644
--- a/launcher/README.md
+++ b/launcher/README.md
@@ -31,6 +31,41 @@ uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
 | `HF_TOKEN` | HuggingFace API token | No |
 | `NEMORUN_HOME` | NeMo Run home directory (default: cwd) | No |
 
+## Model and Dataset Storage (`hf_local`)
+
+Pipeline YAMLs use a `global_vars.hf_local` path prefix for model weights and datasets. This should be a **self-managed directory that mirrors the HuggingFace Hub hierarchy**:
+
+```text
+/hf-local/
+├── Qwen/Qwen3-8B/              # model weights
+├── meta-llama/Llama-3.1-8B/    # model weights
+├── abisee/cnn_dailymail/        # calibration dataset
+└── cais/mmlu/                   # evaluation dataset
+```
+
+Using a dedicated folder is preferred over the HuggingFace cache (`~/.cache/huggingface`) to avoid cache corruption from concurrent jobs writing to the same cache directory.
+
+You can populate it by copying or symlinking from an existing HuggingFace download:
+
+```bash
+# Example: download a model and copy to hf_local
+huggingface-cli download Qwen/Qwen3-8B --local-dir /hf-local/Qwen/Qwen3-8B
+```
+
+Override `hf_local` in any YAML via CLI:
+
+```bash
+# Use a different local path
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
+    pipeline.global_vars.hf_local=/mnt/my-models/ --yes
+
+# Download from HuggingFace Hub directly (no local cache)
+uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml \
+    pipeline.global_vars.hf_local="" --yes
+```
+
+For Slurm clusters, `SLURM_HF_LOCAL` sets the container mount path (e.g., `/lustre/.../hf-local:/hf-local`).
+
 ## Directory Structure
 
 ```text
@@ -39,18 +74,27 @@ launcher/
 ├── core.py                      # Shared logic (also used by nmm-sandbox's slurm.py)
 ├── slurm_config.py              # SlurmConfig dataclass and factory
 ├── pyproject.toml               # Dependencies (nemo-run, pyyaml)
-├── services/                    # Shell scripts executed on the cluster
+├── common/                      # Shared scripts executed on the cluster
 │   ├── service_utils.sh         # Error handling, MPI rank utilities
-│   └── megatron-lm/quantize/
-│       ├── quantize.sh          # PTQ quantization + MMLU evaluation
-│       └── Qwen3-8B.yaml        # Task config for Qwen3-8B
-├── Qwen/Qwen3-8B/              # Example pipeline config
-│   └── megatron_lm_ptq.yaml
-└── modules/                     # Git submodules
-    ├── Megatron-LM/             # NVIDIA Megatron-LM training framework
-    └── Model-Optimizer/         # NVIDIA ModelOpt library
+│   ├── query.py                 # OpenAI-compatible query client
+│   ├── megatron-lm/quantize/
+│   │   └── quantize.sh          # PTQ quantization + MMLU evaluation
+│   ├── tensorrt-llm/query.sh    # TRT-LLM server launch + query
+│   ├── vllm/query.sh            # vLLM server launch + query
+│   ├── eagle3/                  # EAGLE3 speculative decoding scripts
+│   └── specdec_bench/           # Speculative decoding benchmark
+├── Qwen/Qwen3-8B/              # Example configs
+│   ├── megatron_lm_ptq.yaml     # PTQ quantization pipeline
+│   └── hf_offline_eagle3.yaml   # EAGLE3 offline pipeline
+└── modules/                     # Dependencies
+    ├── Megatron-LM/             # Git submodule: NVIDIA Megatron-LM
+    └── Model-Optimizer -> ../.. # Symlink to parent (auto-created if missing)
 ```
 
+> **Note:** `modules/Model-Optimizer` is a symlink to the parent directory (`../..`),
+> not a submodule. This avoids recursive nesting. `launch.py` auto-creates
+> the symlink on first run if it's missing.
+
 ## YAML Config Format
 
 A config YAML defines the job name, pipeline metadata, and one or more tasks:
@@ -63,14 +107,14 @@ pipeline:
   note:
 
   task_0:
-    script: services/megatron-lm/quantize/quantize.sh
+    script: common/megatron-lm/quantize/quantize.sh
     args:
       - --calib-dataset-path-or-name /hf-local/abisee/cnn_dailymail
       - --calib-size 32
     environment:
       - MLM_MODEL_CFG: Qwen/Qwen3-8B
       - QUANT_CFG: NVFP4_DEFAULT_CFG
-      - TP: 1
+      - TP: 4
     slurm_config:
       _factory_: "slurm_factory"
       nodes: 1
@@ -80,7 +124,8 @@ pipeline:
 
 ### Multi-task Pipeline
 
-Tasks run sequentially — `task_1` starts only after `task_0` completes:
+Tasks run sequentially — `task_1` starts only after `task_0` completes.
+Example (illustrative — export script may not exist yet):
 
 ```yaml
 job_name: Qwen3-8B_quantize_export
@@ -89,7 +134,7 @@ pipeline:
     hf_model: /hf-local/Qwen/Qwen3-8B
 
   task_0:
-    script: services/megatron-lm/quantize/quantize.sh
+    script: common/megatron-lm/quantize/quantize.sh
     environment:
       - HF_MODEL_CKPT: <<global_vars.hf_model>>
     slurm_config:
@@ -97,7 +142,7 @@ pipeline:
       nodes: 1
 
   task_1:
-    script: services/megatron-lm/export/export.sh
+    script: common/megatron-lm/export/export.sh
     environment:
       - HF_MODEL_CKPT: <<global_vars.hf_model>>
     slurm_config:
@@ -119,7 +164,7 @@ The file contains both `job_name` and `pipeline`:
 job_name: Qwen3-8B_NVFP4
 pipeline:
   task_0:
-    script: services/megatron-lm/quantize/quantize.sh
+    script: common/megatron-lm/quantize/quantize.sh
     slurm_config:
       _factory_: "slurm_factory"
 ```
@@ -130,7 +175,7 @@ This is useful for reusing pipeline configs across different job names:
 ```yaml
 # bare_pipeline.yaml — used with: uv run launch.py pipeline=@bare_pipeline.yaml --yes
 task_0:
-  script: services/megatron-lm/quantize/quantize.sh
+  script: common/megatron-lm/quantize/quantize.sh
   slurm_config:
     _factory_: "slurm_factory"
 ```
@@ -186,7 +231,7 @@ uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml detach=true --yes
 ## How It Works
 
 1. `launch.py` parses the YAML and creates a `SandboxPipeline` with tasks and `SlurmConfig`
-2. Code is packaged via `PatternPackager` — only `modules/Megatron-LM/`, `modules/Model-Optimizer/`, and `services/` are synced
+2. Code is packaged via `PatternPackager` — `modules/Megatron-LM/`, `modules/Model-Optimizer/` (via symlink), and `common/` are synced
 3. For remote jobs: code is rsynced to the cluster, an sbatch script is generated and submitted via SSH
 4. For local jobs: a Docker container is launched with the same container image and mounts
 5. The `code/` directory on the cluster mirrors the launcher structure:
@@ -196,9 +241,20 @@ code/
 ├── modules/
 │   ├── Megatron-LM/megatron/...
 │   └── Model-Optimizer/modelopt/...
-└── services/...
+└── common/...
 ```
 
+## Running Tests
+
+```bash
+cd launcher
+uv pip install pytest
+uv run python3 -m pytest ../tests/unit/launcher/ -v -o "addopts=" \
+    --confcutdir=../tests/unit/launcher
+```
+
+64 unit tests cover core dataclasses, factory registry, YAML parsing, Docker/Slurm executor construction, environment merging, and end-to-end Docker launch.
+
 ## Reporting Bugs
 
 When filing a bug report, please include:
@@ -236,3 +292,7 @@ uv run slurm.py --yaml modules/Model-Optimizer/launcher/Qwen/Qwen3-8B/megatron_l
 # From Model-Optimizer/launcher (public)
 uv run launch.py --yaml Qwen/Qwen3-8B/megatron_lm_ptq.yaml --yes
 ```
+
+Verified: identical MMLU results (0.719 local, 0.730 OCI-HSG) from both launchers.
+
+For architecture details, factory system, and Claude Code workflows, see [ADVANCED.md](ADVANCED.md).
diff --git a/launcher/common/megatron-lm/quantize/quantize.sh b/launcher/common/megatron-lm/quantize/quantize.sh
index d4b3d5248..6e4d21b99 100755
--- a/launcher/common/megatron-lm/quantize/quantize.sh
+++ b/launcher/common/megatron-lm/quantize/quantize.sh
@@ -38,7 +38,7 @@ EXPORT_EXE="bash modules/Megatron-LM/examples/post_training/modelopt/export.sh"
 export MLM_EXTRA_ARGS=${@}
 ${QUANTIZE_EXE} ${MLM_MODEL_CFG} ${QUANT_CFG}
 
-export MLM_EXTRA_ARGS="--mmlu-dataset /hf-local/cais/mmlu --fraction 0.01 --lower-bound 0.38 --disable-tqdm"
+export MLM_EXTRA_ARGS="--mmlu-dataset ${MMLU_DATASET:-/hf-local/cais/mmlu} --fraction 0.01 --lower-bound 0.38 --disable-tqdm"
 MLM_MODEL_CKPT=${MLM_MODEL_SAVE} ${MMLU_EXE} ${MLM_MODEL_CFG}
 
 ###################################################################################################
diff --git a/launcher/core.py b/launcher/core.py
index 18e22dfe8..a722767cf 100644
--- a/launcher/core.py
+++ b/launcher/core.py
@@ -144,6 +144,7 @@ class GlobalVariables:
 
     hf_model: str = None
     hf_data: str = None
+    hf_local: str = None
 
 
 @dataclass
diff --git a/launcher/launch.py b/launcher/launch.py
index 5b90d9acf..934104264 100644
--- a/launcher/launch.py
+++ b/launcher/launch.py
@@ -46,6 +46,12 @@
 LAUNCHER_DIR = os.path.dirname(os.path.abspath(__file__))
 MODELOPT_ROOT = os.path.dirname(LAUNCHER_DIR)
 
+# Ensure modules/Model-Optimizer symlink exists (points to parent Model-Optimizer root)
+_mo_symlink = os.path.join(LAUNCHER_DIR, "modules", "Model-Optimizer")
+if not os.path.exists(_mo_symlink):
+    os.makedirs(os.path.join(LAUNCHER_DIR, "modules"), exist_ok=True)
+    os.symlink(os.path.relpath(MODELOPT_ROOT, os.path.join(LAUNCHER_DIR, "modules")), _mo_symlink)
+
 EXPERIMENT_TITLE = "cicd"
 DEFAULT_SLURM_ENV, DEFAULT_LOCAL_ENV = get_default_env(EXPERIMENT_TITLE)
 
@@ -56,10 +62,9 @@
         "modules/Megatron-LM/*.py",
         "modules/Model-Optimizer/modelopt/*",
         "modules/Model-Optimizer/examples/*",
-        "services/*",
         "common/*",
     ],
-    relative_path=[LAUNCHER_DIR] * 7,
+    relative_path=[LAUNCHER_DIR] * 6,
 )
 
 MODELOPT_SRC_PATH = os.path.join(LAUNCHER_DIR, "modules/Model-Optimizer/modelopt")
diff --git a/uv.lock b/uv.lock
index 5849559ad..0f36f2dbb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -16,9 +16,6 @@ resolution-markers = [
     "python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'",
 ]
 
-[manifest]
-overrides = [{ name = "torch", marker = "sys_platform == 'never'" }]
-
 [[package]]
 name = "accelerate"
 version = "1.13.0"
@@ -31,7 +28,7 @@ dependencies = [
     { name = "psutil" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ca/14/787e5498cd062640f0f3d92ef4ae4063174f76f9afd29d13fc52a319daae/accelerate-1.13.0.tar.gz", hash = "sha256:d631b4e0f5b3de4aff2d7e9e6857d164810dfc3237d54d017f075122d057b236", size = 402835, upload-time = "2026-03-04T19:34:12.359Z" }
 wheels = [
@@ -407,6 +404,19 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/54/27/01d9078a77b9e31b79b9716e66ca4db74f4744c5232bcb3e8769395c4280/cppimport-22.8.2.tar.gz", hash = "sha256:bbb4957102db41bc99ad72c233bce92f9d1fd91be352fc07878c4361033a401f", size = 26635, upload-time = "2022-08-02T16:50:36.872Z" }
 
+[[package]]
+name = "cuda-bindings"
+version = "12.9.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cuda-pathfinder", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7a/d8/b546104b8da3f562c1ff8ab36d130c8fe1dd6a045ced80b4f6ad74f7d4e1/cuda_bindings-12.9.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d3c842c2a4303b2a580fe955018e31aea30278be19795ae05226235268032e5", size = 12148218, upload-time = "2025-10-21T14:51:28.855Z" },
+    { url = "https://files.pythonhosted.org/packages/45/e7/b47792cc2d01c7e1d37c32402182524774dadd2d26339bd224e0e913832e/cuda_bindings-12.9.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c912a3d9e6b6651853eed8eed96d6800d69c08e94052c292fec3f282c5a817c9", size = 12210593, upload-time = "2025-10-21T14:51:36.574Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/c1/dabe88f52c3e3760d861401bb994df08f672ec893b8f7592dc91626adcf3/cuda_bindings-12.9.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fda147a344e8eaeca0c6ff113d2851ffca8f7dfc0a6c932374ee5c47caa649c8", size = 12151019, upload-time = "2025-10-21T14:51:43.167Z" },
+]
+
 [[package]]
 name = "cuda-pathfinder"
 version = "1.4.1"
@@ -478,7 +488,7 @@ dependencies = [
     { name = "psutil", marker = "sys_platform != 'win32'" },
     { name = "py-cpuinfo", marker = "sys_platform != 'win32'" },
     { name = "pydantic", marker = "sys_platform != 'win32'" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch", marker = "sys_platform != 'win32'" },
     { name = "tqdm", marker = "sys_platform != 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/30/00/ad310cf94e0e397c416087e6c4dc782429292206b2b1a3ffbd388ac95a67/deepspeed-0.18.7.tar.gz", hash = "sha256:3763530196f8e7df8fc56d028a8c64409200695213920dc6cf0045d50c884079", size = 1646894, upload-time = "2026-03-05T20:44:56.579Z" }
@@ -1106,7 +1116,9 @@ name = "networkx"
 version = "3.4.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'win32'",
     "(python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform != 'win32') or (python_full_version < '3.11' and sys_platform == 'darwin')",
+    "python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'win32'",
     "python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" }
@@ -1119,10 +1131,14 @@ name = "networkx"
 version = "3.6.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
+    "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'win32'",
+    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'win32'",
     "(python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform != 'win32') or (python_full_version >= '3.12' and sys_platform == 'darwin')",
     "(python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform != 'win32') or (python_full_version == '3.11.*' and sys_platform == 'darwin')",
     "python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'",
     "python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'",
+    "python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'win32'",
+    "python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'win32'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" }
 wheels = [
@@ -1274,6 +1290,108 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/57/a7/b35835e278c18b85206834b3aa3abe68e77a98769c59233d1f6300284781/numpy-2.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4b42639cdde6d24e732ff823a3fa5b701d8acad89c4142bc1d0bd6dc85200ba5", size = 12504685, upload-time = "2026-03-09T07:58:50.525Z" },
 ]
 
+[[package]]
+name = "nvidia-cublas-cu12"
+version = "12.8.4.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-cupti-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
+]
+
+[[package]]
+name = "nvidia-cuda-runtime-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
+]
+
+[[package]]
+name = "nvidia-cudnn-cu12"
+version = "9.10.2.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+]
+
+[[package]]
+name = "nvidia-cufft-cu12"
+version = "11.3.3.83"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
+]
+
+[[package]]
+name = "nvidia-cufile-cu12"
+version = "1.13.1.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
+]
+
+[[package]]
+name = "nvidia-curand-cu12"
+version = "10.3.9.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
+]
+
+[[package]]
+name = "nvidia-cusolver-cu12"
+version = "11.7.3.90"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-cublas-cu12", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
+]
+
+[[package]]
+name = "nvidia-cusparse-cu12"
+version = "12.5.8.93"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
+]
+
+[[package]]
+name = "nvidia-cusparselt-cu12"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+]
+
 [[package]]
 name = "nvidia-ml-py"
 version = "13.590.48"
@@ -1300,7 +1418,7 @@ dependencies = [
     { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
     { name = "setuptools" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch" },
     { name = "tqdm" },
 ]
 
@@ -1505,6 +1623,38 @@ requires-dist = [
 ]
 provides-extras = ["onnx", "hf", "dev-lint", "dev-docs", "dev-test", "all", "dev"]
 
+[[package]]
+name = "nvidia-nccl-cu12"
+version = "2.27.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" },
+]
+
+[[package]]
+name = "nvidia-nvjitlink-cu12"
+version = "12.8.93"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.4.5"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" },
+]
+
+[[package]]
+name = "nvidia-nvtx-cu12"
+version = "12.8.90"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
+]
+
 [[package]]
 name = "onnx"
 version = "1.19.1"
@@ -1829,7 +1979,7 @@ dependencies = [
     { name = "psutil" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch" },
     { name = "tqdm" },
     { name = "transformers" },
 ]
@@ -2882,7 +3032,7 @@ dependencies = [
     { name = "huggingface-hub" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch" },
     { name = "torchvision" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d7/2c/593109822fe735e637382aca6640c1102c19797f7791f1fd1dab2d6c3cb1/timm-1.0.25.tar.gz", hash = "sha256:47f59fc2754725735cc81bb83bcbfce5bec4ebd5d4bb9e69da57daa92fcfa768", size = 2414743, upload-time = "2026-02-23T16:49:00.137Z" }
@@ -2961,15 +3111,52 @@ name = "torch"
 version = "2.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "filelock" },
     { name = "fsspec" },
     { name = "jinja2" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "setuptools", marker = "python_full_version >= '3.12'" },
     { name = "sympy" },
+    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "typing-extensions" },
 ]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5b/30/bfebdd8ec77db9a79775121789992d6b3b75ee5494971294d7b4b7c999bc/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2b980edd8d7c0a68c4e951ee1856334a43193f98730d97408fbd148c1a933313", size = 79411457, upload-time = "2026-02-10T21:44:59.189Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/8b/4b61d6e13f7108f36910df9ab4b58fd389cc2520d54d81b88660804aad99/torch-2.10.0-2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:418997cb02d0a0f1497cf6a09f63166f9f5df9f3e16c8a716ab76a72127c714f", size = 79423467, upload-time = "2026-02-10T21:44:48.711Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/54/a2ba279afcca44bbd320d4e73675b282fcee3d81400ea1b53934efca6462/torch-2.10.0-2-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:13ec4add8c3faaed8d13e0574f5cd4a323c11655546f91fbe6afa77b57423574", size = 79498202, upload-time = "2026-02-10T21:44:52.603Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ee/efbd56687be60ef9af0c9c0ebe106964c07400eade5b0af8902a1d8cd58c/torch-2.10.0-3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a1ff626b884f8c4e897c4c33782bdacdff842a165fee79817b1dd549fdda1321", size = 915510070, upload-time = "2026-03-11T14:16:39.386Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ab/7b562f1808d3f65414cd80a4f7d4bb00979d9355616c034c171249e1a303/torch-2.10.0-3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ac5bdcbb074384c66fa160c15b1ead77839e3fe7ed117d667249afce0acabfac", size = 915518691, upload-time = "2026-03-11T14:15:43.147Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/7a/abada41517ce0011775f0f4eacc79659bc9bc6c361e6bfe6f7052a6b9363/torch-2.10.0-3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:98c01b8bb5e3240426dcde1446eed6f40c778091c8544767ef1168fc663a05a6", size = 915622781, upload-time = "2026-03-11T14:17:11.354Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/1a/c61f36cfd446170ec27b3a4984f072fd06dab6b5d7ce27e11adb35d6c838/torch-2.10.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5276fa790a666ee8becaffff8acb711922252521b28fbce5db7db5cf9cb2026d", size = 145992962, upload-time = "2026-01-21T16:24:14.04Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/60/6662535354191e2d1555296045b63e4279e5a9dbad49acf55a5d38655a39/torch-2.10.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:aaf663927bcd490ae971469a624c322202a2a1e68936eb952535ca4cd3b90444", size = 915599237, upload-time = "2026-01-21T16:23:25.497Z" },
+    { url = "https://files.pythonhosted.org/packages/40/b8/66bbe96f0d79be2b5c697b2e0b187ed792a15c6c4b8904613454651db848/torch-2.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:a4be6a2a190b32ff5c8002a0977a25ea60e64f7ba46b1be37093c141d9c49aeb", size = 113720931, upload-time = "2026-01-21T16:24:23.743Z" },
+    { url = "https://files.pythonhosted.org/packages/76/bb/d820f90e69cda6c8169b32a0c6a3ab7b17bf7990b8f2c680077c24a3c14c/torch-2.10.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:35e407430795c8d3edb07a1d711c41cc1f9eaddc8b2f1cc0a165a6767a8fb73d", size = 79411450, upload-time = "2026-01-21T16:25:30.692Z" },
+    { url = "https://files.pythonhosted.org/packages/78/89/f5554b13ebd71e05c0b002f95148033e730d3f7067f67423026cc9c69410/torch-2.10.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:3282d9febd1e4e476630a099692b44fdc214ee9bf8ee5377732d9d9dfe5712e4", size = 145992610, upload-time = "2026-01-21T16:25:26.327Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/30/a3a2120621bf9c17779b169fc17e3dc29b230c29d0f8222f499f5e159aa8/torch-2.10.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a2f9edd8dbc99f62bc4dfb78af7bf89499bca3d753423ac1b4e06592e467b763", size = 915607863, upload-time = "2026-01-21T16:25:06.696Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/3d/c87b33c5f260a2a8ad68da7147e105f05868c281c63d65ed85aa4da98c66/torch-2.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:29b7009dba4b7a1c960260fc8ac85022c784250af43af9fb0ebafc9883782ebd", size = 113723116, upload-time = "2026-01-21T16:25:21.916Z" },
+    { url = "https://files.pythonhosted.org/packages/61/d8/15b9d9d3a6b0c01b883787bd056acbe5cc321090d4b216d3ea89a8fcfdf3/torch-2.10.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:b7bd80f3477b830dd166c707c5b0b82a898e7b16f59a7d9d42778dd058272e8b", size = 79423461, upload-time = "2026-01-21T16:24:50.266Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/af/758e242e9102e9988969b5e621d41f36b8f258bb4a099109b7a4b4b50ea4/torch-2.10.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:5fd4117d89ffd47e3dcc71e71a22efac24828ad781c7e46aaaf56bf7f2796acf", size = 145996088, upload-time = "2026-01-21T16:24:44.171Z" },
+    { url = "https://files.pythonhosted.org/packages/23/8e/3c74db5e53bff7ed9e34c8123e6a8bfef718b2450c35eefab85bb4a7e270/torch-2.10.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:787124e7db3b379d4f1ed54dd12ae7c741c16a4d29b49c0226a89bea50923ffb", size = 915711952, upload-time = "2026-01-21T16:23:53.503Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/01/624c4324ca01f66ae4c7cd1b74eb16fb52596dce66dbe51eff95ef9e7a4c/torch-2.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:2c66c61f44c5f903046cc696d088e21062644cbe541c7f1c4eaae88b2ad23547", size = 113757972, upload-time = "2026-01-21T16:24:39.516Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/5c/dee910b87c4d5c0fcb41b50839ae04df87c1cfc663cf1b5fca7ea565eeaa/torch-2.10.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:6d3707a61863d1c4d6ebba7be4ca320f42b869ee657e9b2c21c736bf17000294", size = 79498198, upload-time = "2026-01-21T16:24:34.704Z" },
+]
 
 [[package]]
 name = "torch-geometric"
@@ -2999,7 +3186,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch" },
     { name = "torchvision" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6f/36/574c0c46e818533b78b3c09505211162918188325ab4165ef11a3f295755/torchprofile-0.0.4.tar.gz", hash = "sha256:96b6da17d752a06b02977e078aea95614893b31d4117dd5dcd081f30ce65611b", size = 4557, upload-time = "2021-06-22T04:58:03.592Z" }
@@ -3015,7 +3202,7 @@ dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "numpy", version = "2.4.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
     { name = "pillow" },
-    { name = "torch", marker = "sys_platform == 'never'" },
+    { name = "torch" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/50/ae/cbf727421eb73f1cf907fbe5788326a08f111b3f6b6ddca15426b53fec9a/torchvision-0.25.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a95c47abb817d4e90ea1a8e57bd0d728e3e6b533b3495ae77d84d883c4d11f56", size = 1874919, upload-time = "2026-01-21T16:27:47.617Z" },
@@ -3100,6 +3287,16 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" },
 ]
 
+[[package]]
+name = "triton"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8c/f7/f1c9d3424ab199ac53c2da567b859bcddbb9c9e7154805119f8bd95ec36f/triton-3.6.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6550fae429e0667e397e5de64b332d1e5695b73650ee75a6146e2e902770bea", size = 188105201, upload-time = "2026-01-20T16:00:29.272Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/12/b05ba554d2c623bffa59922b94b0775673de251f468a9609bc9e45de95e9/triton-3.6.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8e323d608e3a9bfcc2d9efcc90ceefb764a82b99dea12a86d643c72539ad5d3", size = 188214640, upload-time = "2026-01-20T16:00:35.869Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"

From 4a05a1d1ad4ddb05fa43072c61990fc2466faf1f Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sat, 14 Mar 2026 21:32:48 -0700
Subject: [PATCH 09/12] fix: skip launcher tests when nemo_run not installed,
 add docstrings

Skip all launcher tests with pytest.skip when nemo_run is not available
(CI tox env doesn't have it). Add docstrings to __post_init__ and
_resolve for 100% docstring coverage.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 launcher/core.py                |  2 ++
 tests/unit/launcher/conftest.py | 13 +++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/launcher/core.py b/launcher/core.py
index a722767cf..de2f5b061 100644
--- a/launcher/core.py
+++ b/launcher/core.py
@@ -171,6 +171,7 @@ class SandboxPipeline:
     _factory_lookup: dict = None
 
     def __post_init__(self):
+        """Collect tasks from slots/configs and resolve <<global_vars.X>> references."""
         if self.tasks is None:
             self.tasks = []
             for i in range(5):
@@ -191,6 +192,7 @@ def __post_init__(self):
             }
 
             def _resolve(s):
+                """Replace <<global_vars.X>> with the corresponding value."""
                 if not isinstance(s, str):
                     return s
                 return re.sub(
diff --git a/tests/unit/launcher/conftest.py b/tests/unit/launcher/conftest.py
index d19ced583..44fd9d936 100644
--- a/tests/unit/launcher/conftest.py
+++ b/tests/unit/launcher/conftest.py
@@ -15,10 +15,12 @@
 
 """Fixtures for launcher unit tests.
 
-These tests can be run standalone without installing modelopt:
+These tests require nemo_run and are skipped when it's not installed.
+
+Standalone run (from launcher/ directory):
     cd Model-Optimizer/launcher
     uv pip install pytest
-    uv run python3 -m pytest ../tests/unit/launcher/ -v -o "addopts=" --rootdir=.
+    uv run python3 -m pytest ../tests/unit/launcher/ -v -o "addopts=" --confcutdir=../tests/unit/launcher
 """
 
 import os
@@ -26,8 +28,11 @@
 
 import pytest
 
-# Prevent pytest from loading the root conftest.py (which imports torch/modelopt)
-collect_ignore_glob = ["../../conftest.py"]
+# Skip all tests in this directory if nemo_run is not installed
+try:
+    import nemo_run  # noqa: F401
+except ImportError:
+    pytest.skip("nemo_run not installed, skipping launcher tests", allow_module_level=True)
 
 
 @pytest.fixture(autouse=True)

From edaaab06e84cd30d08657966e60e4e8788bbcc24 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sat, 14 Mar 2026 21:43:13 -0700
Subject: [PATCH 10/12] chg: move launcher tests to launcher/tests/, add CI
 workflow

Move tests from tests/unit/launcher/ to launcher/tests/ for
self-containment. Add launcher job to unit_tests.yml using uv.
Add pytest.ini to override root pyproject.toml addopts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 .github/workflows/unit_tests.yml              | 21 +++++++++++++++++--
 launcher/CLAUDE.md                            |  2 ++
 launcher/pytest.ini                           |  2 ++
 .../launcher => launcher/tests}/__init__.py   |  0
 .../launcher => launcher/tests}/conftest.py   | 18 ++++++----------
 .../launcher => launcher/tests}/test_core.py  |  1 +
 .../tests}/test_core_extended.py              |  1 +
 .../tests}/test_docker_execution.py           |  1 +
 .../tests}/test_docker_launch.py              |  4 ++--
 .../tests}/test_slurm_config.py               |  1 +
 .../tests}/test_slurm_executor.py             |  1 +
 .../tests}/test_yaml_formats.py               |  1 -
 12 files changed, 36 insertions(+), 17 deletions(-)
 create mode 100644 launcher/pytest.ini
 rename {tests/unit/launcher => launcher/tests}/__init__.py (100%)
 rename {tests/unit/launcher => launcher/tests}/conftest.py (70%)
 rename {tests/unit/launcher => launcher/tests}/test_core.py (99%)
 rename {tests/unit/launcher => launcher/tests}/test_core_extended.py (99%)
 rename {tests/unit/launcher => launcher/tests}/test_docker_execution.py (99%)
 rename {tests/unit/launcher => launcher/tests}/test_docker_launch.py (98%)
 rename {tests/unit/launcher => launcher/tests}/test_slurm_config.py (99%)
 rename {tests/unit/launcher => launcher/tests}/test_slurm_executor.py (99%)
 rename {tests/unit/launcher => launcher/tests}/test_yaml_formats.py (99%)

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index f9745ce3c..3156efcab 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -12,6 +12,7 @@ on:
       - "tests/unit/**"
       - "pyproject.toml"
       - "tox.ini"
+      - "launcher/**"
   schedule:
     - cron: "0 0 * * *" # Nightly
   workflow_dispatch: # On-demand
@@ -98,6 +99,21 @@ jobs:
       - uses: ./.github/actions/ubuntu-setup
       - name: Run unit tests
         run: pip install tox && tox -e py312-torch210-tf_${{ matrix.tf }}-unit
+  launcher:
+    if: github.event_name == 'pull_request'
+    needs: [linux]
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          submodules: recursive
+      - name: Run launcher tests
+        working-directory: launcher
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          uv pip install pytest
+          uv run python3 -m pytest -v
   partial-install:
     if: github.event_name == 'pull_request'
     needs: [linux]
@@ -114,7 +130,7 @@ jobs:
   unit-pr-required-check:
     # Run even if some jobs are skipped
     if: ${{ github.event_name == 'pull_request' && always() }}
-    needs: [linux, windows, multi-py, multi-torch, multi-transformers, partial-install]
+    needs: [linux, windows, multi-py, multi-torch, multi-transformers, partial-install, launcher]
     runs-on: ubuntu-latest
     steps:
       - name: Required unit tests did not succeed
@@ -124,5 +140,6 @@ jobs:
           needs.multi-py.result != 'success' ||
           needs.multi-torch.result != 'success' ||
           needs.multi-transformers.result != 'success' ||
-          needs.partial-install.result != 'success' }}
+          needs.partial-install.result != 'success' ||
+          needs.launcher.result != 'success' }}
         run: exit 1
diff --git a/launcher/CLAUDE.md b/launcher/CLAUDE.md
index 288923272..3cc03a67e 100644
--- a/launcher/CLAUDE.md
+++ b/launcher/CLAUDE.md
@@ -96,6 +96,8 @@ launch.py → imports core.py + slurm_config.py
 
 64 unit tests in `tests/unit/launcher/`. Run standalone without installing `modelopt`:
 
+From the launcher directory:
+
 ```shell
 uv run python3 -m pytest ../tests/unit/launcher/ -v -o "addopts=" --confcutdir=../tests/unit/launcher
 ```
diff --git a/launcher/pytest.ini b/launcher/pytest.ini
new file mode 100644
index 000000000..5ee647716
--- /dev/null
+++ b/launcher/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+testpaths = tests
diff --git a/tests/unit/launcher/__init__.py b/launcher/tests/__init__.py
similarity index 100%
rename from tests/unit/launcher/__init__.py
rename to launcher/tests/__init__.py
diff --git a/tests/unit/launcher/conftest.py b/launcher/tests/conftest.py
similarity index 70%
rename from tests/unit/launcher/conftest.py
rename to launcher/tests/conftest.py
index 44fd9d936..bb6ccb045 100644
--- a/tests/unit/launcher/conftest.py
+++ b/launcher/tests/conftest.py
@@ -15,12 +15,13 @@
 
 """Fixtures for launcher unit tests.
 
-These tests require nemo_run and are skipped when it's not installed.
-
-Standalone run (from launcher/ directory):
+Run from the launcher directory:
     cd Model-Optimizer/launcher
     uv pip install pytest
-    uv run python3 -m pytest ../tests/unit/launcher/ -v -o "addopts=" --confcutdir=../tests/unit/launcher
+    uv run python3 -m pytest tests/ -v
+
+Or via tox from Model-Optimizer root:
+    tox -e py312-launcher
 """
 
 import os
@@ -28,18 +29,11 @@
 
 import pytest
 
-# Skip all tests in this directory if nemo_run is not installed
-try:
-    import nemo_run  # noqa: F401
-except ImportError:
-    pytest.skip("nemo_run not installed, skipping launcher tests", allow_module_level=True)
-
 
 @pytest.fixture(autouse=True)
 def add_launcher_to_path():
     """Add the launcher directory to sys.path so core.py and slurm_config.py can be imported."""
-    launcher_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "launcher")
-    launcher_dir = os.path.abspath(launcher_dir)
+    launcher_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     if launcher_dir not in sys.path:
         sys.path.insert(0, launcher_dir)
     yield
diff --git a/tests/unit/launcher/test_core.py b/launcher/tests/test_core.py
similarity index 99%
rename from tests/unit/launcher/test_core.py
rename to launcher/tests/test_core.py
index 69c0fc40d..6c7e8f043 100644
--- a/tests/unit/launcher/test_core.py
+++ b/launcher/tests/test_core.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# ruff: noqa: D102
 """Tests for launcher/core.py — shared dataclasses, factory registry, and utilities.
 
 Coverage:
diff --git a/tests/unit/launcher/test_core_extended.py b/launcher/tests/test_core_extended.py
similarity index 99%
rename from tests/unit/launcher/test_core_extended.py
rename to launcher/tests/test_core_extended.py
index 698c5b438..9d4ba5604 100644
--- a/tests/unit/launcher/test_core_extended.py
+++ b/launcher/tests/test_core_extended.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# ruff: noqa: D102
 """Extended tests for launcher/core.py — edge cases and remaining coverage gaps.
 
 Coverage:
diff --git a/tests/unit/launcher/test_docker_execution.py b/launcher/tests/test_docker_execution.py
similarity index 99%
rename from tests/unit/launcher/test_docker_execution.py
rename to launcher/tests/test_docker_execution.py
index 693071bb3..6d3fa0fa7 100644
--- a/tests/unit/launcher/test_docker_execution.py
+++ b/launcher/tests/test_docker_execution.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# ruff: noqa: D102
 """Tests for Docker execution path — verifies build_docker_executor and run_jobs with mocked Docker.
 
 Coverage:
diff --git a/tests/unit/launcher/test_docker_launch.py b/launcher/tests/test_docker_launch.py
similarity index 98%
rename from tests/unit/launcher/test_docker_launch.py
rename to launcher/tests/test_docker_launch.py
index 8baad32c8..625d28b08 100644
--- a/tests/unit/launcher/test_docker_launch.py
+++ b/launcher/tests/test_docker_launch.py
@@ -57,7 +57,7 @@ def test_echo_script_via_launch(self, tmp_path):
         yaml_path.write_text(yaml_content)
 
         # Run launch.py as a subprocess (avoids pytest stdin capture issues)
-        launcher_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "launcher")
+        launcher_dir = os.path.join(os.path.dirname(__file__), "..")
         launcher_dir = os.path.abspath(launcher_dir)
 
         result = subprocess.run(
@@ -100,7 +100,7 @@ def test_failing_script_via_launch(self, tmp_path):
         yaml_path = tmp_path / "fail_test.yaml"
         yaml_path.write_text(yaml_content)
 
-        launcher_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "launcher")
+        launcher_dir = os.path.join(os.path.dirname(__file__), "..")
         launcher_dir = os.path.abspath(launcher_dir)
 
         result = subprocess.run(
diff --git a/tests/unit/launcher/test_slurm_config.py b/launcher/tests/test_slurm_config.py
similarity index 99%
rename from tests/unit/launcher/test_slurm_config.py
rename to launcher/tests/test_slurm_config.py
index aeb09200e..b23c46c24 100644
--- a/tests/unit/launcher/test_slurm_config.py
+++ b/launcher/tests/test_slurm_config.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# ruff: noqa: D102
 """Tests for launcher/slurm_config.py — SlurmConfig dataclass and factory.
 
 Coverage:
diff --git a/tests/unit/launcher/test_slurm_executor.py b/launcher/tests/test_slurm_executor.py
similarity index 99%
rename from tests/unit/launcher/test_slurm_executor.py
rename to launcher/tests/test_slurm_executor.py
index 48004c786..d7ac7827f 100644
--- a/tests/unit/launcher/test_slurm_executor.py
+++ b/launcher/tests/test_slurm_executor.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# ruff: noqa: D102
 """Tests for build_slurm_executor — container mounts, scratch paths, executor params.
 
 Note: actual SSH tunnel and sbatch submission are not tested (require live infra).
diff --git a/tests/unit/launcher/test_yaml_formats.py b/launcher/tests/test_yaml_formats.py
similarity index 99%
rename from tests/unit/launcher/test_yaml_formats.py
rename to launcher/tests/test_yaml_formats.py
index 571535343..981c32216 100644
--- a/tests/unit/launcher/test_yaml_formats.py
+++ b/launcher/tests/test_yaml_formats.py
@@ -58,7 +58,6 @@ def test_yaml_format_with_job_name(self, tmp_yaml):
 
     def test_bare_pipeline_format(self, tmp_yaml):
         """The pipeline=@ format is a bare SandboxPipeline without wrapper."""
-
         content = """
 task_0:
   script: a.sh

From 472c091be6a44c732799294d0ada943b2e383f78 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sun, 15 Mar 2026 09:18:33 -0700
Subject: [PATCH 11/12] fix: create venv before uv pip install in launcher CI
 job

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 .github/workflows/unit_tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index 3156efcab..fc23c8c23 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -112,6 +112,8 @@ jobs:
         working-directory: launcher
         run: |
           curl -LsSf https://astral.sh/uv/install.sh | sh
+          export PATH="$HOME/.local/bin:$PATH"
+          uv venv .venv
           uv pip install pytest
           uv run python3 -m pytest -v
   partial-install:

From 410de11a8b4f700a24b2254f65f45937b62e8446 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Sun, 15 Mar 2026 10:57:30 -0700
Subject: [PATCH 12/12] fix: use nemo-run from PyPI, install project deps in CI

Switch from git-pinned nemo-run to nemo-run>=0.8.0 from PyPI (avoids
uv TOML parse error). Add py-modules=[] to prevent setuptools auto-
discovery. CI installs project with `uv pip install -e . pytest`.
Add ModelOpt mount mechanism docs to ADVANCED.md.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 .github/workflows/unit_tests.yml |  2 +-
 launcher/ADVANCED.md             | 29 ++++++++++++++++++++++++++++-
 launcher/pyproject.toml          |  5 ++++-
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index fc23c8c23..006056ac0 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -114,7 +114,7 @@ jobs:
           curl -LsSf https://astral.sh/uv/install.sh | sh
           export PATH="$HOME/.local/bin:$PATH"
           uv venv .venv
-          uv pip install pytest
+          uv pip install -e . pytest
           uv run python3 -m pytest -v
   partial-install:
     if: github.event_name == 'pull_request'
diff --git a/launcher/ADVANCED.md b/launcher/ADVANCED.md
index 8698f4ce8..cc8678c95 100644
--- a/launcher/ADVANCED.md
+++ b/launcher/ADVANCED.md
@@ -44,7 +44,34 @@ code/
     └── query.py                       # OpenAI-compatible query client
 ```
 
-The `modelopt/` directory is bind-mounted over the container's installed ModelOpt, so your local changes take effect without rebuilding the container.
+### ModelOpt Mount Mechanism
+
+The container image (e.g., `nvcr.io/nvidia/tensorrt-llm/release:1.2.0rc5`) ships with a pre-installed version of ModelOpt at a fixed path like `/usr/local/lib/python3.12/dist-packages/modelopt`. The launcher **bind-mounts your local `modelopt/` over this path**, so your local changes take effect without rebuilding the container.
+
+The mount is configured via `modelopt_install_path` in `SlurmConfig`:
+
+```yaml
+slurm_config:
+  modelopt_install_path: /usr/local/lib/python3.12/dist-packages/modelopt
+```
+
+At runtime, the executor constructs the mount:
+
+- **Slurm**: `{job_dir}/{experiment_title}/{exp_id}/{task}/code/modules/Model-Optimizer/modelopt` → `{modelopt_install_path}`
+- **Docker**: `{LAUNCHER_DIR}/modules/Model-Optimizer/modelopt` → `{modelopt_install_path}` (follows the symlink to the parent's `modelopt/`)
+
+This means:
+
+1. You can edit `modelopt/` source code locally
+2. Submit a job — the packager tars your changes and ships them to the cluster
+3. On the cluster, the container sees your modified `modelopt/` instead of the pre-installed one
+4. No container rebuild needed for iterating on ModelOpt changes
+
+The `modelopt_install_path` varies by container image. Check with:
+
+```bash
+docker run --rm <image> python3 -c "import modelopt; print(modelopt.__file__)"
+```
 
 ### Model-Optimizer Symlink
 
diff --git a/launcher/pyproject.toml b/launcher/pyproject.toml
index 0e576e5af..6ecc201e8 100644
--- a/launcher/pyproject.toml
+++ b/launcher/pyproject.toml
@@ -4,9 +4,12 @@ version = "0.1.0"
 description = "ModelOpt Launcher — submit quantization, training, and evaluation jobs to Slurm clusters"
 requires-python = ">=3.10"
 dependencies = [
-    "nemo-run@git+https://github.com/NVIDIA-NeMo/Run@2ccf1c9e68acd157da451721b24635bcc83be87e",
+    "nemo-run>=0.8.0",
     "pyyaml",
 ]
 
+[tool.setuptools]
+py-modules = []
+
 [dependency-groups]
 dev = []