diff --git a/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/mdp/curriculums.py b/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/mdp/curriculums.py index 05b3e51a..8fce8ed6 100644 --- a/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/mdp/curriculums.py +++ b/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/mdp/curriculums.py @@ -4,6 +4,10 @@ from collections.abc import Sequence from typing import TYPE_CHECKING +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg +from isaaclab.terrains import TerrainImporter + if TYPE_CHECKING: from isaaclab.envs import ManagerBasedRLEnv @@ -59,3 +63,55 @@ def ang_vel_cmd_levels( ).tolist() return torch.tensor(ranges.ang_vel_z[1], device=env.device) + + + +def terrain_levels( + env: "ManagerBasedRLEnv", + env_ids: Sequence[int], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + up_ratio: float = 0.5, + down_ratio: float = 0.5, +) -> torch.Tensor: + """ + A robust curriculum for terrain difficulty that is only called at the end of an episode. + + - **Upgrade condition**: Based on the total displacement and the size of the terrain. + - **Downgrade condition**: Based on the expected distance covered during the last command window. + This prevents agents that are slow in the beginning but fast at the end from being unfairly + downgraded. + """ + asset: Articulation = env.scene[asset_cfg.name] + terrain: TerrainImporter = env.scene.terrain + + distance = torch.norm( + asset.data.root_pos_w[env_ids, :2] - env.scene.env_origins[env_ids, :2], + dim=1, + ) + + # Total episode duration in seconds + T_episode = float(getattr(env, "max_episode_length_s", 0.0)) + + # Magnitude of the last linear velocity command + command_term = env.command_manager.get_term("base_velocity") + command = env.command_manager.get_command("base_velocity") + cmd_speed_last = torch.linalg.norm(command[env_ids, :2], dim=1) + cmd_sampling_period_s = command_term.cfg.resampling_time_range[0] + + terrain_size_x = float(terrain.cfg.terrain_generator.size[0]) + move_up = distance > (up_ratio * terrain_size_x) + + # Conservatively estimate the expected distance using only the last command window + effective_T = min(T_episode, float(cmd_sampling_period_s)) + expected_dist_last = cmd_speed_last * effective_T + + # robots that walked less than half of their required distance go to simpler terrains + move_down = distance < (down_ratio * expected_dist_last) + move_down *= ~move_up + + # Do not downgrade if the environment is reset due to a timeout + move_down[env.termination_manager.time_outs[env_ids]] = False + + # update terrain levels + terrain.update_env_origins(env_ids, move_up, move_down) + return torch.mean(terrain.terrain_levels.float()) diff --git a/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/robots/go2/velocity_env_cfg.py b/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/robots/go2/velocity_env_cfg.py index 7496d767..e6f4eeb1 100644 --- a/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/robots/go2/velocity_env_cfg.py +++ b/source/unitree_rl_lab/unitree_rl_lab/tasks/locomotion/robots/go2/velocity_env_cfg.py @@ -359,7 +359,10 @@ class TerminationsCfg: class CurriculumCfg: """Curriculum terms for the MDP.""" - terrain_levels = CurrTerm(func=mdp.terrain_levels_vel) + terrain_levels = CurrTerm( + func=mdp.terrain_levels, + params={"up_ratio": 0.5, "down_ratio": 0.5}, + ) lin_vel_cmd_levels = CurrTerm(mdp.lin_vel_cmd_levels)