fix unit test failed

RayenTian · RayenTian · commit c0bfaa6da8d8 · 2025-11-15T21:16:40.000-08:00
Signed-off-by: ruit &lt;ruit@nvidia.com&gt;
diff --git a/nemo_rl/data/datasets/preference_datasets/helpsteer3.py b/nemo_rl/data/datasets/preference_datasets/helpsteer3.py
@@ -58,7 +58,7 @@ class HelpSteer3Dataset:
 
     def __init__(self) -> None:
         ds = load_dataset("nvidia/HelpSteer3", "preference")
-        self.task_name = "helpsteer3"
+        self.task_name = "HelpSteer3"
         self.formatted_ds = ds.map(to_preference_data_format)
         self.formatted_ds = self.formatted_ds.map(
             lambda _: {"task_name": self.task_name}
diff --git a/nemo_rl/data/datasets/response_datasets/helpsteer3.py b/nemo_rl/data/datasets/response_datasets/helpsteer3.py
@@ -53,7 +53,7 @@ class HelpSteer3Dataset:
 
     def __init__(self) -> None:
         ds = load_dataset("nvidia/HelpSteer3", "preference")
-        self.task_name = "helpsteer3"
+        self.task_name = "HelpSteer3"
         self.formatted_ds = ds.map(to_response_data_format)
         self.formatted_ds = self.formatted_ds.map(
             lambda _: {"task_name": self.task_name}
diff --git a/tests/functional/grpo.sh b/tests/functional/grpo.sh
@@ -19,7 +19,7 @@ mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
 uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
-    $PROJECT_ROOT/examples/run_grpo_math.py \
+    $PROJECT_ROOT/examples/run_grpo.py \
     policy.model_name=Qwen/Qwen3-0.6B \
     grpo.num_prompts_per_step=2 \
     grpo.num_generations_per_prompt=4 \
diff --git a/tests/functional/grpo_math_env.sh b/tests/functional/grpo_math_env.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_grpo_math.py \
+    policy.model_name=Qwen/Qwen3-0.6B \
+    grpo.num_prompts_per_step=2 \
+    grpo.num_generations_per_prompt=4 \
+    policy.train_global_batch_size=4 \
+    policy.train_micro_batch_size=1 \
+    cluster.gpus_per_node=2 \
+    grpo.max_num_steps=2 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=false \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+    'max(data["train/token_mult_prob_error"]) < 1.05'
+
diff --git a/tests/unit/data/test_data_shuffle_reproducity.py b/tests/unit/data/test_data_shuffle_reproducity.py
@@ -43,21 +43,24 @@ def create_dataloader(
     """Create a dataloader with consistent configuration for testing."""
     # Initialize dataset
     data = OpenMathInstruct2Dataset(seed=seed)
+    task_name = (
+        data.task_name if hasattr(data, "task_name") else data.task_spec.task_name
+    )
 
     # Setup tokenizer
     tokenizer = get_tokenizer(TOKENIZER_CONFIG)
 
     # Configure task specification
     math_task_spec = TaskDataSpec(
-        task_name="math",
+        task_name=task_name,
         prompt_file=f"{os.path.dirname(os.path.abspath(__file__))}/../../../examples/prompts/cot.txt",
         system_prompt_file=None,
     )
 
     task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
         defaultdict(lambda: (math_task_spec, math_hf_data_processor))
     )
-    task_data_processors["math"] = (math_task_spec, math_hf_data_processor)
+    task_data_processors[task_name] = (math_task_spec, math_hf_data_processor)
 
     dataset = AllTaskProcessedDataset(
         dataset=data.formatted_ds["train"].select(range(1000)),
diff --git a/tests/unit/test_config_validation.py b/tests/unit/test_config_validation.py
@@ -35,6 +35,9 @@
 if not OmegaConf.has_resolver("mul"):
     OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
 
+if not OmegaConf.has_resolver("max"):
+    OmegaConf.register_new_resolver("max", lambda a, b: max(a, b))
+
 
 def validate_config_section(
     section_config: Dict[str, Any],