unitreerobotics · linden713 · Oct 1, 2025 · Nov 17, 2025
diff --git a/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/mdp/curriculums.py b/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/mdp/curriculums.py
@@ -4,6 +4,10 @@
 from collections.abc import Sequence
 from typing import TYPE_CHECKING
 
+from isaaclab.assets import Articulation
+from isaaclab.managers import SceneEntityCfg
+from isaaclab.terrains import TerrainImporter
+
 if TYPE_CHECKING:
     from isaaclab.envs import ManagerBasedRLEnv
 
@@ -59,3 +63,55 @@ def ang_vel_cmd_levels(
             ).tolist()
 
     return torch.tensor(ranges.ang_vel_z[1], device=env.device)
+
+
+
+def terrain_levels(
+    env: "ManagerBasedRLEnv",
+    env_ids: Sequence[int],
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
+    up_ratio: float = 0.5, 
+    down_ratio: float = 0.5,             
+) -> torch.Tensor:
+    """
+    A robust curriculum for terrain difficulty that is only called at the end of an episode.
+
+    - **Upgrade condition**: Based on the total displacement and the size of the terrain.
+    - **Downgrade condition**: Based on the expected distance covered during the last command window.
+      This prevents agents that are slow in the beginning but fast at the end from being unfairly
+      downgraded.
+    """
+    asset: Articulation = env.scene[asset_cfg.name]
+    terrain: TerrainImporter = env.scene.terrain
+
+    distance = torch.norm(
+        asset.data.root_pos_w[env_ids, :2] - env.scene.env_origins[env_ids, :2],
+        dim=1,
+    )
+
+    # Total episode duration in seconds
+    T_episode = float(getattr(env, "max_episode_length_s", 0.0))
+
+    # Magnitude of the last linear velocity command
+    command_term = env.command_manager.get_term("base_velocity")
+    command = env.command_manager.get_command("base_velocity")
+    cmd_speed_last = torch.linalg.norm(command[env_ids, :2], dim=1)
+    cmd_sampling_period_s = command_term.cfg.resampling_time_range[0]
+
+    terrain_size_x = float(terrain.cfg.terrain_generator.size[0])
+    move_up = distance > (up_ratio * terrain_size_x)
+
+    # Conservatively estimate the expected distance using only the last command window
+    effective_T = min(T_episode, float(cmd_sampling_period_s))
+    expected_dist_last = cmd_speed_last * effective_T
+
+    # robots that walked less than half of their required distance go to simpler terrains
+    move_down = distance < (down_ratio * expected_dist_last)
+    move_down *= ~move_up
+
+    # Do not downgrade if the environment is reset due to a timeout
+    move_down[env.termination_manager.time_outs[env_ids]] = False
+
+    # update terrain levels
+    terrain.update_env_origins(env_ids, move_up, move_down)
+    return torch.mean(terrain.terrain_levels.float())
diff --git a/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/robots/go2/velocity_env_cfg.py b/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/robots/go2/velocity_env_cfg.py
@@ -359,7 +359,10 @@ class TerminationsCfg:
 class CurriculumCfg:
     """Curriculum terms for the MDP."""
 
-    terrain_levels = CurrTerm(func=mdp.terrain_levels_vel)
+    terrain_levels = CurrTerm(
+        func=mdp.terrain_levels,
+        params={"up_ratio": 0.5, "down_ratio": 0.5},
+    )
     lin_vel_cmd_levels = CurrTerm(mdp.lin_vel_cmd_levels)