From 73495ebd19e709a683ee20e9d2e5aa416e6b3ed9 Mon Sep 17 00:00:00 2001 From: noeyy-mino <174223378+noeyy-mino@users.noreply.github.com> Date: Tue, 23 Dec 2025 06:19:07 +0000 Subject: [PATCH 1/3] add new test cases for ckpts on hf Signed-off-by: noeyy-mino <174223378+noeyy-mino@users.noreply.github.com> --- tests/_test_utils/deploy_utils.py | 4 ++-- tests/examples/cnn_qat/test_resnet50.py | 27 ++++++++++++++----------- tests/examples/llm_ptq/test_deploy.py | 18 +++++++++++++++-- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/tests/_test_utils/deploy_utils.py b/tests/_test_utils/deploy_utils.py index 7a1897ccb..c2c82cbf1 100644 --- a/tests/_test_utils/deploy_utils.py +++ b/tests/_test_utils/deploy_utils.py @@ -146,7 +146,7 @@ def _deploy_vllm(self): pytest.skip("vllm package not available") quantization_method = "modelopt" - if "FP4" in self.model_id: + if "FP4" in self.model_id.lower(): quantization_method = "modelopt_fp4" llm = LLM( model=self.model_id, @@ -182,7 +182,7 @@ def _deploy_sglang(self): except ImportError: pytest.skip("sglang package not available") quantization_method = "modelopt" - if "FP4" in self.model_id: + if "FP4" in self.model_id.lower(): quantization_method = "modelopt_fp4" if "eagle" in self.model_id.lower(): llm = sgl.Engine( diff --git a/tests/examples/cnn_qat/test_resnet50.py b/tests/examples/cnn_qat/test_resnet50.py index 21b2e4860..dba47587e 100644 --- a/tests/examples/cnn_qat/test_resnet50.py +++ b/tests/examples/cnn_qat/test_resnet50.py @@ -19,14 +19,19 @@ from _test_utils.examples.run_command import run_example_command from _test_utils.torch.misc import minimum_gpu -imagenet_path = os.getenv("IMAGENET_PATH") -skip_no_imagenet = pytest.mark.skipif( - not imagenet_path or not os.path.isdir(imagenet_path), - reason="IMAGENET_PATH environment variable is not set or does not point to a valid directory", -) +@pytest.fixture +def imagenet_path(): + """Fixture to get IMAGENET_PATH from environment and skip if not valid.""" + path = os.getenv("IMAGENET_PATH") + if not path or not os.path.isdir(path): + pytest.skip( + "IMAGENET_PATH environment variable is not set or does not point to a valid directory" + ) + return path -def _build_common_command(): + +def _build_common_command(imagenet_path): """Build common command arguments for CNN QAT training.""" train_data_path = os.path.join(imagenet_path, "train") val_data_path = os.path.join(imagenet_path, "val") @@ -58,21 +63,19 @@ def _run_qat_command(base_cmd, common_args, output_dir, example_dir="cnn_qat"): run_example_command(full_command, example_dir) -@skip_no_imagenet @minimum_gpu(1) -def test_cnn_qat_single_gpu(tmp_path): +def test_cnn_qat_single_gpu(tmp_path, imagenet_path): """Test CNN QAT on single GPU.""" - common_args = _build_common_command() + common_args = _build_common_command(imagenet_path) base_command = ["python", "torchvision_qat.py", "--gpu", "0"] _run_qat_command(base_command, common_args, tmp_path) -@skip_no_imagenet @minimum_gpu(2) -def test_cnn_qat_multi_gpu(tmp_path): +def test_cnn_qat_multi_gpu(tmp_path, imagenet_path): """Test CNN QAT on multiple GPUs.""" - common_args = _build_common_command() + common_args = _build_common_command(imagenet_path) base_command = ["torchrun", "--nproc_per_node=2", "torchvision_qat.py"] _run_qat_command(base_command, common_args, tmp_path) diff --git a/tests/examples/llm_ptq/test_deploy.py b/tests/examples/llm_ptq/test_deploy.py index 3d3229e01..7588f91f6 100644 --- a/tests/examples/llm_ptq/test_deploy.py +++ b/tests/examples/llm_ptq/test_deploy.py @@ -464,14 +464,28 @@ def test_medusa(command): ), *ModelDeployerList( base_model="openai/gpt-oss-120b", - model_id="nvidia/gpt-oss-120b-Eagle3", + model_id="nvidia/gpt-oss-120b-Eagle3-long-context", backend=("trtllm", "sglang"), tensor_parallel_size=8, mini_sm=89, ), *ModelDeployerList( base_model="openai/gpt-oss-120b", - model_id="nvidia/gpt-oss-120b-Eagle3-v2", + model_id="nvidia/gpt-oss-120b-Eagle3-short-context", + backend=("trtllm", "sglang"), + tensor_parallel_size=8, + mini_sm=89, + ), + *ModelDeployerList( + base_model="openai/gpt-oss-120b", + model_id="nvidia/gpt-oss-120b-Eagle3-throughput", + backend=("trtllm", "sglang"), + tensor_parallel_size=8, + mini_sm=89, + ), + *ModelDeployerList( + base_model="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + model_id="nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", backend=("trtllm", "sglang"), tensor_parallel_size=8, mini_sm=89, From ced4b6c881c3b0fbd45a26f964508ed2725e825d Mon Sep 17 00:00:00 2001 From: noeyy-mino <174223378+noeyy-mino@users.noreply.github.com> Date: Thu, 25 Dec 2025 05:38:53 +0000 Subject: [PATCH 2/3] add test for Nemotron 3 Nano Signed-off-by: noeyy-mino <174223378+noeyy-mino@users.noreply.github.com> --- tests/_test_utils/deploy_utils.py | 43 ++++++++++++++++++++++++++- tests/examples/llm_ptq/test_deploy.py | 24 +++++++++++++-- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/tests/_test_utils/deploy_utils.py b/tests/_test_utils/deploy_utils.py index c2c82cbf1..4a0bfffbc 100644 --- a/tests/_test_utils/deploy_utils.py +++ b/tests/_test_utils/deploy_utils.py @@ -100,7 +100,40 @@ def _deploy_trtllm(self): spec_config = None llm = None kv_cache_config = KvCacheConfig(enable_block_reuse=True, free_gpu_memory_fraction=0.8) - if "eagle" in self.model_id.lower(): + + if self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8": + llm = LLM( + model=self.model_id, + tensor_parallel_size=self.tensor_parallel_size, + enable_attention_dp=False, + attn_backend=self.attn_backend, + trust_remote_code=True, + max_batch_size=8, + kv_cache_config=KvCacheConfig( + enable_block_reuse=False, + mamba_ssm_cache_dtype="float32", + ), + ) + elif self.model_id == "nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": + spec_config = EagleDecodingConfig( + max_draft_len=3, + speculative_model_dir=self.model_id, + eagle3_one_model=self.eagle3_one_model, + ) + llm = LLM( + model=self.model_id, + tensor_parallel_size=self.tensor_parallel_size, + enable_attention_dp=False, + attn_backend=self.attn_backend, + trust_remote_code=True, + max_batch_size=8, + speculative_config=spec_config, + kv_cache_config=KvCacheConfig( + enable_block_reuse=False, + mamba_ssm_cache_dtype="float32", + ), + ) + elif "eagle" in self.model_id.lower(): spec_config = EagleDecodingConfig( max_draft_len=3, speculative_model_dir=self.model_id, @@ -197,6 +230,14 @@ def _deploy_sglang(self): mem_fraction_static=0.7, context_length=1024, ) + elif self.model_id == "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8": + llm = sgl.Engine( + model_path=self.model_id, + quantization=quantization_method, + tp_size=self.tensor_parallel_size, + trust_remote_code=True, + attention_backend="flashinfer", + ) else: llm = sgl.Engine( model_path=self.model_id, diff --git a/tests/examples/llm_ptq/test_deploy.py b/tests/examples/llm_ptq/test_deploy.py index 7588f91f6..868304f48 100644 --- a/tests/examples/llm_ptq/test_deploy.py +++ b/tests/examples/llm_ptq/test_deploy.py @@ -386,6 +386,13 @@ def test_kimi(command): tensor_parallel_size=8, mini_sm=89, ), + *ModelDeployerList( + model_id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", + backend=("trtllm", "vllm", "sglang"), + tensor_parallel_size=1, + mini_sm=89, + attn_backend="FLASHINFER", + ), ], ids=idfn, ) @@ -486,7 +493,8 @@ def test_medusa(command): *ModelDeployerList( base_model="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", model_id="nvidia/EAGLE3-NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", - backend=("trtllm", "sglang"), + backend=("trtllm", "vllm", "sglang"), + eagle3_one_model=False, tensor_parallel_size=8, mini_sm=89, ), @@ -501,4 +509,16 @@ def test_medusa(command): ids=idfn, ) def test_eagle(command): - command.run() + """Skip test if MODELOPT_LOCAL_MODEL_ROOT is set but model doesn't exist locally. + speculative models shoule be loaded by local path""" + local_root = os.getenv("MODELOPT_LOCAL_MODEL_ROOT") + if not local_root: + return + + local_path = os.path.join(local_root, command.model_id) + if os.path.isdir(local_path): + # Update model_id to use local path + command.model_id = local_path + command.run() + else: + pytest.skip(f"Local model not found: {local_path}") From a368545ded81febfd43576bcfd0f03fdd0875ad5 Mon Sep 17 00:00:00 2001 From: noeyy-mino <174223378+noeyy-mino@users.noreply.github.com> Date: Mon, 29 Dec 2025 10:21:29 +0000 Subject: [PATCH 3/3] fix typo error Signed-off-by: noeyy-mino <174223378+noeyy-mino@users.noreply.github.com> --- tests/_test_utils/deploy_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/_test_utils/deploy_utils.py b/tests/_test_utils/deploy_utils.py index 4a0bfffbc..805624b8f 100644 --- a/tests/_test_utils/deploy_utils.py +++ b/tests/_test_utils/deploy_utils.py @@ -179,7 +179,7 @@ def _deploy_vllm(self): pytest.skip("vllm package not available") quantization_method = "modelopt" - if "FP4" in self.model_id.lower(): + if "fp4" in self.model_id.lower(): quantization_method = "modelopt_fp4" llm = LLM( model=self.model_id, @@ -215,7 +215,7 @@ def _deploy_sglang(self): except ImportError: pytest.skip("sglang package not available") quantization_method = "modelopt" - if "FP4" in self.model_id.lower(): + if "fp4" in self.model_id.lower(): quantization_method = "modelopt_fp4" if "eagle" in self.model_id.lower(): llm = sgl.Engine(