From 0024fd7edd2fb25546f25079643e2fe9a48f3618 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Thu, 4 Apr 2024 18:38:58 +0200 Subject: [PATCH 01/16] WIP --- gym_xarm/__init__.py | 2 +- gym_xarm/robot_env.py | 306 +++++++++++++++++++++++++++++++++++++++++ gym_xarm/tasks/base.py | 259 ++++++++++++++++++++++++---------- gym_xarm/tasks/lift.py | 13 +- tests/test_env.py | 4 +- 5 files changed, 508 insertions(+), 76 deletions(-) create mode 100644 gym_xarm/robot_env.py diff --git a/gym_xarm/__init__.py b/gym_xarm/__init__.py index 0bae784..90ba44a 100644 --- a/gym_xarm/__init__.py +++ b/gym_xarm/__init__.py @@ -4,5 +4,5 @@ id="gym_xarm/XarmLift-v0", entry_point="gym_xarm.tasks:Lift", max_episode_steps=300, - kwargs={"obs_mode": "state"}, + kwargs={"obs_type": "state"}, ) diff --git a/gym_xarm/robot_env.py b/gym_xarm/robot_env.py new file mode 100644 index 0000000..d1d732e --- /dev/null +++ b/gym_xarm/robot_env.py @@ -0,0 +1,306 @@ +import os +from typing import Optional + +import gymnasium as gym +import mujoco +import numpy as np +from gymnasium import spaces +from gymnasium_robotics.utils import mujoco_utils + +DEFAULT_SIZE = 480 + + +class BaseRobotEnv(gym.Env): + """Modified version of BaseRobotEnv to be a `gym.Env` instead of a `gymnasium_robotics.GoalEnv`""" + + metadata = { + "render_modes": [ + "human", + "rgb_array", + ], + "render_fps": 25, + } + + def __init__( + self, + model_path: str, + initial_qpos, + n_actions: int, + n_substeps: int, + render_mode: Optional[str] = None, + width: int = DEFAULT_SIZE, + height: int = DEFAULT_SIZE, + ): + """Initialize the hand and fetch robot superclass. + + Args: + model_path (string): the path to the mjcf MuJoCo model. + initial_qpos (np.ndarray): initial position value of the joints in the MuJoCo simulation. + n_actions (integer): size of the action space. + n_substeps (integer): number of MuJoCo simulation timesteps per Gymnasium step. + render_mode (optional string): type of rendering mode, "human" for window rendeirng and "rgb_array" for offscreen. Defaults to None. + width (optional integer): width of each rendered frame. Defaults to DEFAULT_SIZE. + height (optional integer): height of each rendered frame . Defaults to DEFAULT_SIZE. + """ + if model_path.startswith("/"): + self.fullpath = model_path + else: + self.fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) + if not os.path.exists(self.fullpath): + raise OSError(f"File {self.fullpath} does not exist") + + self.n_substeps = n_substeps + + self.initial_qpos = initial_qpos + + self.width = width + self.height = height + self._initialize_simulation() + + self.goal = np.zeros(0) + obs = self._get_obs() + + assert ( + int(np.round(1.0 / self.dt)) == self.metadata["render_fps"] + ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}' + + self.action_space = spaces.Box(-1.0, 1.0, shape=(n_actions,), dtype="float32") + self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype="float64") + # self.observation_space = spaces.Dict( + # dict( + # desired_goal=spaces.Box( + # -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" + # ), + # achieved_goal=spaces.Box( + # -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" + # ), + # observation=spaces.Box( + # -np.inf, np.inf, shape=obs["observation"].shape, dtype="float64" + # ), + # ) + # ) + + self.render_mode = render_mode + + # Env methods + # ---------------------------- + def compute_terminated(self, achieved_goal, desired_goal, info): + """All the available environments are currently continuing tasks and non-time dependent. The objective is to reach the goal for an indefinite period of time.""" + return False + + def compute_truncated(self, achievec_goal, desired_goal, info): + """The environments will be truncated only if setting a time limit with max_steps which will automatically wrap the environment in a gymnasium TimeLimit wrapper.""" + return False + + def step(self, action): + """Run one timestep of the environment's dynamics using the agent actions. + + Args: + action (np.ndarray): Control action to be applied to the agent and update the simulation. Should be of shape :attr:`action_space`. + + Returns: + observation (dictionary): Next observation due to the agent actions .It should satisfy the `GoalEnv` :attr:`observation_space`. + reward (integer): The reward as a result of taking the action. This is calculated by :meth:`compute_reward` of `GoalEnv`. + terminated (boolean): Whether the agent reaches the terminal state. This is calculated by :meth:`compute_terminated` of `GoalEnv`. + truncated (boolean): Whether the truncation condition outside the scope of the MDP is satisfied. Timically, due to a timelimit, but + it is also calculated in :meth:`compute_truncated` of `GoalEnv`. + info (dictionary): Contains auxiliary diagnostic information (helpful for debugging, learning, and logging). In this case there is a single + key `is_success` with a boolean value, True if the `achieved_goal` is the same as the `desired_goal`. + """ + if np.array(action).shape != self.action_space.shape: + raise ValueError("Action dimension mismatch") + + action = np.clip(action, self.action_space.low, self.action_space.high) + self._set_action(action) + + self._mujoco_step(action) + + self._step_callback() + + if self.render_mode == "human": + self.render() + obs = self._get_obs() + + info = { + "is_success": self._is_success(obs["achieved_goal"], self.goal), + } + + terminated = self.compute_terminated(obs["achieved_goal"], self.goal, info) + truncated = self.compute_truncated(obs["achieved_goal"], self.goal, info) + + reward = self.compute_reward(obs["achieved_goal"], self.goal, info) + + return obs, reward, terminated, truncated, info + + def reset( + self, + *, + seed: Optional[int] = None, + options: Optional[dict] = None, + ): + """Reset MuJoCo simulation to initial state. + + Note: Attempt to reset the simulator. Since we randomize initial conditions, it + is possible to get into a state with numerical issues (e.g. due to penetration or + Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand). + In this case, we just keep randomizing until we eventually achieve a valid initial + configuration. + + Args: + seed (optional integer): The seed that is used to initialize the environment's PRNG (`np_random`). Defaults to None. + options (optional dictionary): Can be used when `reset` is override for additional information to specify how the environment is reset. + + Returns: + observation (dictionary) : Observation of the initial state. It should satisfy the `GoalEnv` :attr:`observation_space`. + info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to + the ``info`` returned by :meth:`step`. + """ + super().reset(seed=seed) + did_reset_sim = False + while not did_reset_sim: + did_reset_sim = self._reset_sim() + self.goal = self._sample_goal().copy() + obs = self._get_obs() + if self.render_mode == "human": + self.render() + + return obs, {} + + # Extension methods + # ---------------------------- + def _mujoco_step(self, action): + """Advance the mujoco simulation. + + Override depending on the python binginds, either mujoco or mujoco_py + """ + raise NotImplementedError + + def _reset_sim(self): + """Resets a simulation and indicates whether or not it was successful. + + If a reset was unsuccessful (e.g. if a randomized state caused an error in the + simulation), this method should indicate such a failure by returning False. + In such a case, this method will be called again to attempt a the reset again. + """ + return True + + def _initialize_simulation(self): + """Initialize MuJoCo simulation data structures mjModel and mjData.""" + raise NotImplementedError + + def _get_obs(self): + """Returns the observation.""" + raise NotImplementedError() + + def _set_action(self, action): + """Applies the given action to the simulation.""" + raise NotImplementedError() + + def _is_success(self, achieved_goal, desired_goal): + """Indicates whether or not the achieved goal successfully achieved the desired goal.""" + raise NotImplementedError() + + def _sample_goal(self): + """Samples a new goal and returns it.""" + raise NotImplementedError() + + def _env_setup(self, initial_qpos): + """Initial configuration of the environment. + + Can be used to configure initial state and extract information from the simulation. + """ + pass + + def _render_callback(self): + """A custom callback that is called before rendering. + + Can be used to implement custom visualizations. + """ + pass + + def _step_callback(self): + """A custom callback that is called after stepping the simulation. + + Can be used to enforce additional constraints on the simulation state. + """ + pass + + +class MujocoRobotEnv(BaseRobotEnv): + """Robot base class for fetch and hand environment versions that depend on new mujoco bindings from Deepmind.""" + + def __init__(self, default_camera_config: Optional[dict] = None, **kwargs): + """Initialize mujoco environment. + + The Deepmind mujoco bindings are initialized alongside the respective mujoco_utils. + + Args: + default_camera_config (optional dictionary): dictionary of default mujoco camera parameters for human rendering. Defaults to None. + The keys for this dictionary can be found in the mujoco mjvCamera struct: + https://mujoco.readthedocs.io/en/latest/APIreference.html?highlight=azimuth#mjvcamera. + + - "type" (integer): camera type (mjtCamera) + - "fixedcamid" (integer): fixed camera id + - "trackbodyid": body id to track + - "lookat" (np.ndarray): cartesian (x, y, z) lookat point + - "distance" (float): distance to lookat point or tracked body + - "azimuth" (float): camera azimuth (deg) + - "elevation" (float): camera elevation (deg) + """ + + self._mujoco = mujoco + self._utils = mujoco_utils + + super().__init__(**kwargs) + + from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer + + self.observation_renderer = MujocoRenderer(self.model, self.data, default_camera_config) + + def _initialize_simulation(self): + self.model = self._mujoco.MjModel.from_xml_path(self.fullpath) + self.data = self._mujoco.MjData(self.model) + self._model_names = self._utils.MujocoModelNames(self.model) + + self.model.vis.global_.offwidth = self.observation_width + self.model.vis.global_.offheight = self.observation_height + + self._env_setup(initial_qpos=self.initial_qpos) + self.initial_time = self.data.time + self.initial_qpos = np.copy(self.data.qpos) + self.initial_qvel = np.copy(self.data.qvel) + + def _reset_sim(self): + self.data.time = self.initial_time + self.data.qpos[:] = np.copy(self.initial_qpos) + self.data.qvel[:] = np.copy(self.initial_qvel) + if self.model.na != 0: + self.data.act[:] = None + + mujoco.mj_forward(self.model, self.data) + return super()._reset_sim() + + def render(self): + """Render a frame of the MuJoCo simulation. + + Returns: + rgb image (np.ndarray): if render_mode is "rgb_array", return a 3D image array. + """ + self._render_callback() + return self.observation_renderer.render(self.render_mode) + + def close(self): + """Close contains the code necessary to "clean up" the environment. + + Terminates any existing WindowViewer instances in the Gymnaisum MujocoRenderer. + """ + if self.observation_renderer is not None: + self.observation_renderer.close() + + @property + def dt(self): + """Return the timestep of each Gymanisum step.""" + return self.model.opt.timestep * self.n_substeps + + def _mujoco_step(self, action): + self._mujoco.mj_step(self.model, self.data, nstep=self.n_substeps) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index a2be191..8ccaf99 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -4,31 +4,46 @@ import gymnasium as gym import mujoco import numpy as np -from gymnasium_robotics.envs import robot_env +from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer +from gymnasium_robotics.utils import mujoco_utils from gym_xarm.tasks import mocap -class Base(robot_env.MujocoRobotEnv): +class Base(gym.Env): """ - Superclass for all simxarm environments. + Superclass for all gym-xarm environments. Args: xml_name (str): name of the xml environment file gripper_rotation (list): initial rotation of the gripper (given as a quaternion) """ + metadata = { + "render_modes": [ + "human", + "rgb_array", + ], + "render_fps": 25, + } + n_substeps = 20 + initial_qpos = {} + _mujoco = mujoco + _utils = mujoco_utils + def __init__( self, - xml_name, - obs_mode="state", + task, + obs_type="state", gripper_rotation=None, - image_size=84, - visualization_width=None, - visualization_height=None, + observation_width=84, + observation_height=84, + visualization_width=680, + visualization_height=680, render_mode=None, frame_stack=1, channel_last=False, ): + # Env setup if gripper_rotation is None: gripper_rotation = [0, 1, 0, 0] self.gripper_rotation = np.array(gripper_rotation, dtype=np.float32) @@ -36,53 +51,133 @@ def __init__( self.max_z = 1.2 self.min_z = 0.2 - self.obs_mode = obs_mode - self.image_size = image_size + # Observations + self.obs_type = obs_type + self.channel_last = channel_last + + # Rendering + self.observation_width = observation_width + self.observation_height = observation_height + self.visualization_width = visualization_width + self.visualization_height = visualization_height self.render_mode = render_mode self.frame_stack = frame_stack - self.channel_last = channel_last self._frames = deque([], maxlen=frame_stack) - super().__init__( - model_path=os.path.join(os.path.dirname(__file__), "assets", f"{xml_name}.xml"), - n_substeps=20, - n_actions=4, - initial_qpos={}, - width=image_size, - height=image_size, - ) + # Assets + self.xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml") + if not os.path.exists(self.xml_path): + raise OSError(f"File {self.xml_path} does not exist") - self.observation_space = self._get_observation_space() + self._initialize_simulation() + self.observation_space = self._initialize_observation_space() self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),)) self.action_padding = np.zeros(4 - len(self.metadata["action_space"]), dtype=np.float32) + + assert ( + int(np.round(1.0 / self.dt)) == self.metadata["render_fps"] + ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}' + if "w" not in self.metadata["action_space"]: self.action_padding[-1] = 1.0 - if visualization_width is not None and visualization_height is not None: - self.custom_size_renderer = self._get_custom_size_renderer( - width=visualization_width, height=visualization_height - ) + # super().__init__( + # xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml"), + # n_substeps=20, + # n_actions=4, + # initial_qpos={}, + # width=image_size, + # height=image_size, + # ) + + self.observation_renderer = self._initialize_renderer(type="observation") + self.visualization_renderer = self._initialize_renderer(type="visualization") + + def _initialize_simulation(self): + """Initialize MuJoCo simulation data structures mjModel and mjData.""" + self.model = self._mujoco.MjModel.from_xml_path(self.fullpath) + self.data = self._mujoco.MjData(self.model) + self._model_names = self._utils.MujocoModelNames(self.model) + + self.model.vis.global_.offwidth = self.observation_width + self.model.vis.global_.offheight = self.observation_height + + self._env_setup(initial_qpos=self.initial_qpos) + self.initial_time = self.data.time + self.initial_qpos = np.copy(self.data.qpos) + self.initial_qvel = np.copy(self.data.qvel) + + def _env_setup(self, initial_qpos): + """Initial configuration of the environment. - def _get_observation_space(self): + Can be used to configure initial state and extract information from the simulation. + """ + for name, value in initial_qpos.items(): + self.data.set_joint_qpos(name, value) + mocap.reset(self.model, self.data) + mujoco.mj_forward(self.model, self.data) + self._sample_goal() + mujoco.mj_forward(self.model, self.data) + + def _initialize_observation_space(self): image_shape = ( (self.image_size, self.image_size, 3 * self.frame_stack) if self.channel_last else (3 * self.frame_stack, self.image_size, self.image_size) ) - if self.obs_mode == "state": - return self.observation_space["observation"] - elif self.obs_mode == "rgb": - return gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8) - elif self.obs_mode == "all": - return gym.spaces.Dict( - state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32), - rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8), + if self.obs_type == "state": + obs = self._get_obs() + observation_space = gym.spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype="float64") + elif self.obs_type == "pixels": + observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8) + elif self.obs_type == "pixels_agent_pos": + observation_space = gym.spaces.Dict( + pixels=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8), + agent_pos=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32), ) else: - raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]") + raise ValueError( + f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]" + ) + + return observation_space + + def _initialize_renderer(self, type: str): + if type == "observation": + model = self.model + elif type == "visualization": + # HACK: MujoCo doesn't allow for custom size rendering on-the-fly, so we + # initialize another renderer with appropriate size for visualization purposes + # see https://gymnasium.farama.org/content/migration-guide/#environment-render + from copy import deepcopy + + model = deepcopy(self.model) + model.vis.global_.offwidth = self.visualization_width + model.vis.global_.offheight = self.visualization_height + else: + raise ValueError(f"Unknown renderer type {type}. Must be one of [observation, visualization]") + + return MujocoRenderer(model, self.data) + + def _reset_sim(self): + """Resets a simulation and indicates whether or not it was successful. + + If a reset was unsuccessful (e.g. if a randomized state caused an error in the + simulation), this method should indicate such a failure by returning False. + In such a case, this method will be called again to attempt a the reset again. + """ + self.data.time = self.initial_time + self.data.qpos[:] = np.copy(self.initial_qpos) + self.data.qvel[:] = np.copy(self.initial_qvel) + if self.model.na != 0: + self.data.act[:] = None + + mujoco.mj_forward(self.model, self.data) + return True @property def dt(self): + """Return the timestep of each Gymanisum step.""" return self.n_substeps * self.model.opt.timestep @property @@ -99,12 +194,14 @@ def robot_state(self): return np.concatenate([self.eef, gripper_angle]) def is_success(self): + """Indicates whether or not the achieved goal successfully achieved the desired goal.""" return NotImplementedError() def get_reward(self): raise NotImplementedError() def _sample_goal(self): + """Samples a new goal and returns it.""" raise NotImplementedError() def get_obs(self): @@ -143,7 +240,6 @@ def _apply_action(self, action): np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]), ) - def _reset_sim(self): self.data.time = self.initial_time self.data.qpos[:] = np.copy(self.initial_qpos) self.data.qvel[:] = np.copy(self.initial_qvel) @@ -158,21 +254,47 @@ def _set_gripper(self, gripper_pos, gripper_rotation): self.data.qpos[10] = 0.0 self.data.qpos[12] = 0.0 - def _env_setup(self, initial_qpos): - for name, value in initial_qpos.items(): - self.data.set_joint_qpos(name, value) - mocap.reset(self.model, self.data) - mujoco.mj_forward(self.model, self.data) - self._sample_goal() - mujoco.mj_forward(self.model, self.data) - - def reset(self, seed=None, options=None): - self._reset_sim() + def reset( + self, + *, + seed: int | None = None, + options: dict | None = None, + ): + """Reset MuJoCo simulation to initial state. + + Note: Attempt to reset the simulator. Since we randomize initial conditions, it + is possible to get into a state with numerical issues (e.g. due to penetration or + Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand). + In this case, we just keep randomizing until we eventually achieve a valid initial + configuration. + + Args: + seed (optional integer): The seed that is used to initialize the environment's PRNG (`np_random`). Defaults to None. + options (optional dictionary): Can be used when `reset` is override for additional information to specify how the environment is reset. + + Returns: + observation (dictionary) : Observation of the initial state. It should satisfy the `GoalEnv` :attr:`observation_space`. + info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to + the ``info`` returned by :meth:`step`. + """ + super().reset(seed=seed) + did_reset_sim = False + while not did_reset_sim: + did_reset_sim = self._reset_sim() observation = self._get_obs() - observation = self._transform_obs(observation) + if self.render_mode == "human": + self.render() info = {} return observation, info + # def reset(self, seed=None, options=None): + # super().reset(seed=seed, options=options) + # self._reset_sim() + # observation = self._get_obs() + # observation = self._transform_obs(observation) + # info = {} + # return observation, info + def step(self, action): assert action.shape == (4,) assert self.action_space.contains(action), "{!r} ({}) invalid".format(action, type(action)) @@ -187,18 +309,18 @@ def step(self, action): return observation, reward, done, info def _transform_obs(self, obs, reset=False): - if self.obs_mode == "state": + if self.obs_type == "state": return obs["observation"] - elif self.obs_mode == "rgb": + elif self.obs_type == "rgb": self._update_frames(reset=reset) rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) return rgb_obs - elif self.obs_mode == "all": + elif self.obs_type == "all": self._update_frames(reset=reset) rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state))) else: - raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]") + raise ValueError(f"Unknown obs_type {self.obs_type}. Must be one of [rgb, all, state]") def _render_callback(self): self._mujoco.mj_forward(self.model, self.data) @@ -211,30 +333,25 @@ def _update_frames(self, reset=False): self._frames.append(pixels) assert len(self._frames) == self.frame_stack + def _render_obs(self): + obs = self.render(mode="rgb_array") + if not self.channel_last: + obs = obs.transpose(2, 0, 1) + return obs.copy() + def render(self, mode="rgb_array"): self._render_callback() - # TODO: use self.render_mode - if mode == "visualize": - return self._custom_size_render() - - return self.mujoco_renderer.render(mode, camera_name="camera0") - - def _custom_size_render(self): - return self.custom_size_renderer.render("rgb_array", camera_name="camera0") + # return self._mujoco.physics.render(height=84, width=84, camera_name="camera0") - def _get_custom_size_renderer(self, width, height): - from copy import deepcopy - - from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer + if mode == "visualize": + return self.visualization_renderer.render("rgb_array", camera_name="camera0") - # HACK: MujoCo doesn't allow for custom size rendering on-the-fly, so we - # initialize another renderer with appropriate size for visualization purposes - # see https://gymnasium.farama.org/content/migration-guide/#environment-render - custom_render_model = deepcopy(self.model) - custom_render_model.vis.global_.offwidth = width - custom_render_model.vis.global_.offheight = height - return MujocoRenderer(custom_render_model, self.data) + return self.observation_renderer.render(mode, camera_name="camera0") def close(self): - if self.mujoco_renderer is not None: - self.mujoco_renderer.close() + """Close contains the code necessary to "clean up" the environment. + + Terminates any existing WindowViewer instances in the Gymnasium MujocoRenderer. + """ + if self.observation_renderer is not None: + self.observation_renderer.close() diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py index 1755694..d3a98d0 100644 --- a/gym_xarm/tasks/lift.py +++ b/gym_xarm/tasks/lift.py @@ -48,6 +48,13 @@ def get_reward(self): return reach_reward / 100 + pick_reward def _get_obs(self): + # WIP + if self.obs_type == "state": + obs = self._get_state_obs() + elif self.obs_type == "pixels": + obs = self._get_pixels_obs() + + def _get_state_obs(self): eef_velp = self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint") eef = self.eef - self.center_of_table @@ -57,7 +64,7 @@ def _get_obs(self): obj_velp = self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt obj_velr = self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt - obs = np.concatenate( + return np.concatenate( [ eef, eef_velp, @@ -79,7 +86,7 @@ def _get_obs(self): ], axis=0, ) - return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef} + # return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef} def _sample_goal(self): # Gripper @@ -100,7 +107,7 @@ def _sample_goal(self): def reset(self, seed=None, options=None): self._action = np.zeros(4) - return super().reset(seed, options) + return super().reset(seed=seed, options=options) def step(self, action): self._action = action.copy() diff --git a/tests/test_env.py b/tests/test_env.py index 23f392e..f8b3380 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -6,7 +6,9 @@ def test_env(): env = gym.make("gym_xarm/XarmLift-v0") - check_env(env.unwrapped, skip_render_check=True) + # check_env(env.unwrapped, skip_render_check=True) + env.reset() + env.render() if __name__ == "__main__": From 186095d3202ec62360a5fc6462f1a1ab655509a0 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Fri, 5 Apr 2024 11:41:39 +0200 Subject: [PATCH 02/16] WIP --- gym_xarm/tasks/base.py | 217 +++++++++++++++++++++-------------------- gym_xarm/tasks/lift.py | 74 +++++++++----- 2 files changed, 163 insertions(+), 128 deletions(-) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 8ccaf99..852a90a 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -1,5 +1,5 @@ import os -from collections import OrderedDict, deque +from collections import deque import gymnasium as gym import mujoco @@ -41,7 +41,7 @@ def __init__( visualization_height=680, render_mode=None, frame_stack=1, - channel_last=False, + channel_last=True, ): # Env setup if gripper_rotation is None: @@ -69,6 +69,7 @@ def __init__( if not os.path.exists(self.xml_path): raise OSError(f"File {self.xml_path} does not exist") + # Initialize sim, spaces & renderers self._initialize_simulation() self.observation_space = self._initialize_observation_space() self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),)) @@ -81,6 +82,9 @@ def __init__( if "w" not in self.metadata["action_space"]: self.action_padding[-1] = 1.0 + self.observation_renderer = self._initialize_renderer(type="observation") + self.visualization_renderer = self._initialize_renderer(type="visualization") + # super().__init__( # xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml"), # n_substeps=20, @@ -90,12 +94,9 @@ def __init__( # height=image_size, # ) - self.observation_renderer = self._initialize_renderer(type="observation") - self.visualization_renderer = self._initialize_renderer(type="visualization") - def _initialize_simulation(self): """Initialize MuJoCo simulation data structures mjModel and mjData.""" - self.model = self._mujoco.MjModel.from_xml_path(self.fullpath) + self.model = self._mujoco.MjModel.from_xml_path(self.xml_path) self.data = self._mujoco.MjData(self.model) self._model_names = self._utils.MujocoModelNames(self.model) @@ -121,9 +122,9 @@ def _env_setup(self, initial_qpos): def _initialize_observation_space(self): image_shape = ( - (self.image_size, self.image_size, 3 * self.frame_stack) + (self.observation_width, self.observation_height, 3 * self.frame_stack) if self.channel_last - else (3 * self.frame_stack, self.image_size, self.image_size) + else (3 * self.frame_stack, self.observation_width, self.observation_height) ) if self.obs_type == "state": obs = self._get_obs() @@ -159,22 +160,6 @@ def _initialize_renderer(self, type: str): return MujocoRenderer(model, self.data) - def _reset_sim(self): - """Resets a simulation and indicates whether or not it was successful. - - If a reset was unsuccessful (e.g. if a randomized state caused an error in the - simulation), this method should indicate such a failure by returning False. - In such a case, this method will be called again to attempt a the reset again. - """ - self.data.time = self.initial_time - self.data.qpos[:] = np.copy(self.initial_qpos) - self.data.qvel[:] = np.copy(self.initial_qvel) - if self.model.na != 0: - self.data.act[:] = None - - mujoco.mj_forward(self.model, self.data) - return True - @property def dt(self): """Return the timestep of each Gymanisum step.""" @@ -205,60 +190,12 @@ def _sample_goal(self): raise NotImplementedError() def get_obs(self): - return self._get_obs() - - def _step_callback(self): - self._mujoco.mj_forward(self.model, self.data) - - def _limit_gripper(self, gripper_pos, pos_ctrl): - if gripper_pos[0] > self.center_of_table[0] - 0.105 + 0.15: - pos_ctrl[0] = min(pos_ctrl[0], 0) - if gripper_pos[0] < self.center_of_table[0] - 0.105 - 0.3: - pos_ctrl[0] = max(pos_ctrl[0], 0) - if gripper_pos[1] > self.center_of_table[1] + 0.3: - pos_ctrl[1] = min(pos_ctrl[1], 0) - if gripper_pos[1] < self.center_of_table[1] - 0.3: - pos_ctrl[1] = max(pos_ctrl[1], 0) - if gripper_pos[2] > self.max_z: - pos_ctrl[2] = min(pos_ctrl[2], 0) - if gripper_pos[2] < self.min_z: - pos_ctrl[2] = max(pos_ctrl[2], 0) - return pos_ctrl - - def _apply_action(self, action): - assert action.shape == (4,) - action = action.copy() - pos_ctrl, gripper_ctrl = action[:3], action[3] - pos_ctrl = self._limit_gripper( - self._utils.get_site_xpos(self.model, self.data, "grasp"), pos_ctrl - ) * (1 / self.n_substeps) - gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) - mocap.apply_action( - self.model, - self._model_names, - self.data, - np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]), - ) - - self.data.time = self.initial_time - self.data.qpos[:] = np.copy(self.initial_qpos) - self.data.qvel[:] = np.copy(self.initial_qvel) - self._sample_goal() - self._mujoco.mj_step(self.model, self.data, nstep=10) - return True - - def _set_gripper(self, gripper_pos, gripper_rotation): - self._utils.set_mocap_pos(self.model, self.data, "robot0:mocap", gripper_pos) - self._utils.set_mocap_quat(self.model, self.data, "robot0:mocap", gripper_rotation) - self._utils.set_joint_qpos(self.model, self.data, "right_outer_knuckle_joint", 0) - self.data.qpos[10] = 0.0 - self.data.qpos[12] = 0.0 + raise NotImplementedError() def reset( self, *, seed: int | None = None, - options: dict | None = None, ): """Reset MuJoCo simulation to initial state. @@ -287,6 +224,22 @@ def reset( info = {} return observation, info + def _reset_sim(self): + """Resets a simulation and indicates whether or not it was successful. + + If a reset was unsuccessful (e.g. if a randomized state caused an error in the + simulation), this method should indicate such a failure by returning False. + In such a case, this method will be called again to attempt a the reset again. + """ + self.data.time = self.initial_time + self.data.qpos[:] = np.copy(self.initial_qpos) + self.data.qvel[:] = np.copy(self.initial_qvel) + if self.model.na != 0: + self.data.act[:] = None + + mujoco.mj_forward(self.model, self.data) + return True + # def reset(self, seed=None, options=None): # super().reset(seed=seed, options=options) # self._reset_sim() @@ -297,56 +250,106 @@ def reset( def step(self, action): assert action.shape == (4,) - assert self.action_space.contains(action), "{!r} ({}) invalid".format(action, type(action)) + assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid" self._apply_action(action) self._mujoco.mj_step(self.model, self.data, nstep=2) self._step_callback() - observation = self._get_obs() - observation = self._transform_obs(observation) + observation = self.get_obs() + # observation = self.get_obs() + # observation = self._transform_obs(observation) reward = self.get_reward() done = False info = {"is_success": self.is_success(), "success": self.is_success()} return observation, reward, done, info - def _transform_obs(self, obs, reset=False): - if self.obs_type == "state": - return obs["observation"] - elif self.obs_type == "rgb": - self._update_frames(reset=reset) - rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) - return rgb_obs - elif self.obs_type == "all": - self._update_frames(reset=reset) - rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) - return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state))) - else: - raise ValueError(f"Unknown obs_type {self.obs_type}. Must be one of [rgb, all, state]") - - def _render_callback(self): + def _step_callback(self): self._mujoco.mj_forward(self.model, self.data) - def _update_frames(self, reset=False): - pixels = self._render_obs() - self._frames.append(pixels) - if reset: - for _ in range(1, self.frame_stack): - self._frames.append(pixels) - assert len(self._frames) == self.frame_stack + def _limit_gripper(self, gripper_pos, pos_ctrl): + if gripper_pos[0] > self.center_of_table[0] - 0.105 + 0.15: + pos_ctrl[0] = min(pos_ctrl[0], 0) + if gripper_pos[0] < self.center_of_table[0] - 0.105 - 0.3: + pos_ctrl[0] = max(pos_ctrl[0], 0) + if gripper_pos[1] > self.center_of_table[1] + 0.3: + pos_ctrl[1] = min(pos_ctrl[1], 0) + if gripper_pos[1] < self.center_of_table[1] - 0.3: + pos_ctrl[1] = max(pos_ctrl[1], 0) + if gripper_pos[2] > self.max_z: + pos_ctrl[2] = min(pos_ctrl[2], 0) + if gripper_pos[2] < self.min_z: + pos_ctrl[2] = max(pos_ctrl[2], 0) + return pos_ctrl + + def _apply_action(self, action): + assert action.shape == (4,) + action = action.copy() + pos_ctrl, gripper_ctrl = action[:3], action[3] + pos_ctrl = self._limit_gripper( + self._utils.get_site_xpos(self.model, self.data, "grasp"), pos_ctrl + ) * (1 / self.n_substeps) + gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) + mocap.apply_action( + self.model, + self._model_names, + self.data, + np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]), + ) - def _render_obs(self): - obs = self.render(mode="rgb_array") - if not self.channel_last: - obs = obs.transpose(2, 0, 1) - return obs.copy() + self.data.time = self.initial_time + self.data.qpos[:] = np.copy(self.initial_qpos) + self.data.qvel[:] = np.copy(self.initial_qvel) + self._sample_goal() + self._mujoco.mj_step(self.model, self.data, nstep=10) + return True + + def _set_gripper(self, gripper_pos, gripper_rotation): + self._utils.set_mocap_pos(self.model, self.data, "robot0:mocap", gripper_pos) + self._utils.set_mocap_quat(self.model, self.data, "robot0:mocap", gripper_rotation) + self._utils.set_joint_qpos(self.model, self.data, "right_outer_knuckle_joint", 0) + self.data.qpos[10] = 0.0 + self.data.qpos[12] = 0.0 + + # def _transform_obs(self, obs, reset=False): + # if self.obs_type == "state": + # return obs["observation"] + # elif self.obs_type == "rgb": + # self._update_frames(reset=reset) + # rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) + # return rgb_obs + # elif self.obs_type == "all": + # self._update_frames(reset=reset) + # rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) + # return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state))) + # else: + # raise ValueError(f"Unknown obs_type {self.obs_type}. Must be one of [rgb, all, state]") + + # def _update_frames(self, reset=False): + # pixels = self._render_obs() + # self._frames.append(pixels) + # if reset: + # for _ in range(1, self.frame_stack): + # self._frames.append(pixels) + # assert len(self._frames) == self.frame_stack + + # def _render_obs(self): + # obs = self.render(mode="rgb_array") + # if not self.channel_last: + # obs = obs.transpose(2, 0, 1) + # return obs.copy() def render(self, mode="rgb_array"): self._render_callback() - # return self._mujoco.physics.render(height=84, width=84, camera_name="camera0") - if mode == "visualize": return self.visualization_renderer.render("rgb_array", camera_name="camera0") - return self.observation_renderer.render(mode, camera_name="camera0") + render = self.observation_renderer.render("rgb_array", camera_name="camera0") + if self.channel_last: + return render + else: + return render.transpose(2, 0, 1) + + def _render_callback(self): + self._mujoco.mj_forward(self.model, self.data) def close(self): """Close contains the code necessary to "clean up" the environment. @@ -355,3 +358,5 @@ def close(self): """ if self.observation_renderer is not None: self.observation_renderer.close() + if self.visualization_renderer is not None: + self.visualization_renderer.close() diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py index d3a98d0..c367017 100644 --- a/gym_xarm/tasks/lift.py +++ b/gym_xarm/tasks/lift.py @@ -19,6 +19,26 @@ def __init__(self, **kwargs): def z_target(self): return self._init_z + self._z_threshold + @property + def eef_velp(self): + return self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt + + @property + def obj_rot(self): + return self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:] + + @property + def obj_velp(self): + return self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt + + @property + def obj_velr(self): + return self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt + + @property + def gripper_angle(self): + return self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint") + def is_success(self): return self.obj[2] >= self.z_target @@ -47,46 +67,56 @@ def get_reward(self): return reach_reward / 100 + pick_reward - def _get_obs(self): - # WIP + def get_obs(self): if self.obs_type == "state": - obs = self._get_state_obs() - elif self.obs_type == "pixels": - obs = self._get_pixels_obs() + return self._get_obs() + pixels = self.render() + if self.obs_type == "pixels": + return pixels + elif self.obs_type == "pixels_agent_pos": + return { + "pixels": pixels, + "agent_pos": self._get_obs(agent_only=True), + } + else: + raise ValueError( + f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]" + ) - def _get_state_obs(self): - eef_velp = self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt - gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint") + def _get_obs(self, agent_only=False): eef = self.eef - self.center_of_table + if agent_only: + return np.concatenate( + [ + eef, + self.eef_velp, + self.gripper_angle, + ] + ) obj = self.obj - self.center_of_table - obj_rot = self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:] - obj_velp = self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt - obj_velr = self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt - return np.concatenate( [ eef, - eef_velp, + self.eef_velp, obj, - obj_rot, - obj_velp, - obj_velr, - eef - obj, + self.obj_rot, + self.obj_velp, + self.obj_velr, + self.eef - self.obj, np.array( [ - np.linalg.norm(eef - obj), + np.linalg.norm(self.eef - self.obj), np.linalg.norm(eef[:-1] - obj[:-1]), self.z_target, self.z_target - obj[-1], self.z_target - eef[-1], ] ), - gripper_angle, + self.gripper_angle, ], axis=0, ) - # return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef} def _sample_goal(self): # Gripper @@ -105,9 +135,9 @@ def _sample_goal(self): # Goal return object_pos + np.array([0, 0, self._z_threshold]) - def reset(self, seed=None, options=None): + def reset(self, seed=None): self._action = np.zeros(4) - return super().reset(seed=seed, options=options) + return super().reset(seed=seed) def step(self, action): self._action = action.copy() From 41020985b93afc9146ac976c9c55f00d16097144 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Fri, 5 Apr 2024 11:46:36 +0200 Subject: [PATCH 03/16] Fix CI --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 985606a..2769dcd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,8 +84,8 @@ jobs: path: .venv key: venv-${{ steps.setup-python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }} - # - name: Install libegl1-mesa-dev (to use MUJOCO_GL=egl) - # run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev + - name: Install libegl1-mesa-dev (to use MUJOCO_GL=egl) + run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev #---------------------------------------------- # install project From fc0cc41ad7de47df73b9d11bceef18e2d40f0f63 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Fri, 5 Apr 2024 15:31:19 +0200 Subject: [PATCH 04/16] WIP --- gym_xarm/tasks/base.py | 20 ++++++++++++-------- gym_xarm/tasks/lift.py | 8 ++++++-- tests/test_env.py | 33 ++++++++++++++++++++++++--------- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 852a90a..2a4a125 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -71,6 +71,8 @@ def __init__( # Initialize sim, spaces & renderers self._initialize_simulation() + self.observation_renderer = self._initialize_renderer(type="observation") + self.visualization_renderer = self._initialize_renderer(type="visualization") self.observation_space = self._initialize_observation_space() self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),)) self.action_padding = np.zeros(4 - len(self.metadata["action_space"]), dtype=np.float32) @@ -82,9 +84,6 @@ def __init__( if "w" not in self.metadata["action_space"]: self.action_padding[-1] = 1.0 - self.observation_renderer = self._initialize_renderer(type="observation") - self.visualization_renderer = self._initialize_renderer(type="visualization") - # super().__init__( # xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml"), # n_substeps=20, @@ -126,15 +125,19 @@ def _initialize_observation_space(self): if self.channel_last else (3 * self.frame_stack, self.observation_width, self.observation_height) ) + obs = self.get_obs() if self.obs_type == "state": - obs = self._get_obs() - observation_space = gym.spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype="float64") + observation_space = gym.spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype=np.float64) elif self.obs_type == "pixels": observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8) elif self.obs_type == "pixels_agent_pos": observation_space = gym.spaces.Dict( - pixels=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8), - agent_pos=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32), + { + "pixels": gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8), + "agent_pos": gym.spaces.Box( + low=-np.inf, high=np.inf, shape=obs["agent_pos"].shape, dtype=np.float64 + ), + } ) else: raise ValueError( @@ -196,6 +199,7 @@ def reset( self, *, seed: int | None = None, + options: dict | None = None, ): """Reset MuJoCo simulation to initial state. @@ -218,7 +222,7 @@ def reset( did_reset_sim = False while not did_reset_sim: did_reset_sim = self._reset_sim() - observation = self._get_obs() + observation = self.get_obs() if self.render_mode == "human": self.render() info = {} diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py index c367017..5bb2293 100644 --- a/gym_xarm/tasks/lift.py +++ b/gym_xarm/tasks/lift.py @@ -135,9 +135,13 @@ def _sample_goal(self): # Goal return object_pos + np.array([0, 0, self._z_threshold]) - def reset(self, seed=None): + def reset( + self, + seed=None, + options: dict | None = None, + ): self._action = np.zeros(4) - return super().reset(seed=seed) + return super().reset(seed=seed, options=options) def step(self, action): self._action = action.copy() diff --git a/tests/test_env.py b/tests/test_env.py index f8b3380..9d2e8ba 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,15 +1,30 @@ +import pytest import gymnasium as gym from gymnasium.utils.env_checker import check_env -import gym_xarm # noqa: F401 - - -def test_env(): - env = gym.make("gym_xarm/XarmLift-v0") - # check_env(env.unwrapped, skip_render_check=True) - env.reset() - env.render() +@pytest.mark.parametrize( + "env_task, obs_type", + [ + ("XarmLift-v0", "state"), + ("XarmLift-v0", "pixels"), + ("XarmLift-v0", "pixels_agent_pos"), + # TODO(aliberts): Add other tasks + # ("reach", False, False), + # ("reach", True, False), + # ("push", False, False), + # ("push", True, False), + # ("peg_in_box", False, False), + # ("peg_in_box", True, False), + ], +) +def test_env(env_task, obs_type): + import gym_xarm # noqa: F401 + env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type) + check_env(env.unwrapped, skip_render_check=True) + # env.reset() + # env.render() if __name__ == "__main__": - test_env() \ No newline at end of file + test_env("XarmLift-v0", "pixels_agent_pos") + # test_env("XarmLift-v0", "state") \ No newline at end of file From 2524966e2c425cfa8425c86cf08a2c2a0836d503 Mon Sep 17 00:00:00 2001 From: Cadene Date: Fri, 5 Apr 2024 16:10:04 +0000 Subject: [PATCH 05/16] Fix, remove eef_velop, add copy(), same API as pusht --- gym_xarm/robot_env.py | 306 ----------------------------------------- gym_xarm/tasks/base.py | 26 ++-- gym_xarm/tasks/lift.py | 1 - 3 files changed, 10 insertions(+), 323 deletions(-) delete mode 100644 gym_xarm/robot_env.py diff --git a/gym_xarm/robot_env.py b/gym_xarm/robot_env.py deleted file mode 100644 index d1d732e..0000000 --- a/gym_xarm/robot_env.py +++ /dev/null @@ -1,306 +0,0 @@ -import os -from typing import Optional - -import gymnasium as gym -import mujoco -import numpy as np -from gymnasium import spaces -from gymnasium_robotics.utils import mujoco_utils - -DEFAULT_SIZE = 480 - - -class BaseRobotEnv(gym.Env): - """Modified version of BaseRobotEnv to be a `gym.Env` instead of a `gymnasium_robotics.GoalEnv`""" - - metadata = { - "render_modes": [ - "human", - "rgb_array", - ], - "render_fps": 25, - } - - def __init__( - self, - model_path: str, - initial_qpos, - n_actions: int, - n_substeps: int, - render_mode: Optional[str] = None, - width: int = DEFAULT_SIZE, - height: int = DEFAULT_SIZE, - ): - """Initialize the hand and fetch robot superclass. - - Args: - model_path (string): the path to the mjcf MuJoCo model. - initial_qpos (np.ndarray): initial position value of the joints in the MuJoCo simulation. - n_actions (integer): size of the action space. - n_substeps (integer): number of MuJoCo simulation timesteps per Gymnasium step. - render_mode (optional string): type of rendering mode, "human" for window rendeirng and "rgb_array" for offscreen. Defaults to None. - width (optional integer): width of each rendered frame. Defaults to DEFAULT_SIZE. - height (optional integer): height of each rendered frame . Defaults to DEFAULT_SIZE. - """ - if model_path.startswith("/"): - self.fullpath = model_path - else: - self.fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) - if not os.path.exists(self.fullpath): - raise OSError(f"File {self.fullpath} does not exist") - - self.n_substeps = n_substeps - - self.initial_qpos = initial_qpos - - self.width = width - self.height = height - self._initialize_simulation() - - self.goal = np.zeros(0) - obs = self._get_obs() - - assert ( - int(np.round(1.0 / self.dt)) == self.metadata["render_fps"] - ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}' - - self.action_space = spaces.Box(-1.0, 1.0, shape=(n_actions,), dtype="float32") - self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype="float64") - # self.observation_space = spaces.Dict( - # dict( - # desired_goal=spaces.Box( - # -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" - # ), - # achieved_goal=spaces.Box( - # -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" - # ), - # observation=spaces.Box( - # -np.inf, np.inf, shape=obs["observation"].shape, dtype="float64" - # ), - # ) - # ) - - self.render_mode = render_mode - - # Env methods - # ---------------------------- - def compute_terminated(self, achieved_goal, desired_goal, info): - """All the available environments are currently continuing tasks and non-time dependent. The objective is to reach the goal for an indefinite period of time.""" - return False - - def compute_truncated(self, achievec_goal, desired_goal, info): - """The environments will be truncated only if setting a time limit with max_steps which will automatically wrap the environment in a gymnasium TimeLimit wrapper.""" - return False - - def step(self, action): - """Run one timestep of the environment's dynamics using the agent actions. - - Args: - action (np.ndarray): Control action to be applied to the agent and update the simulation. Should be of shape :attr:`action_space`. - - Returns: - observation (dictionary): Next observation due to the agent actions .It should satisfy the `GoalEnv` :attr:`observation_space`. - reward (integer): The reward as a result of taking the action. This is calculated by :meth:`compute_reward` of `GoalEnv`. - terminated (boolean): Whether the agent reaches the terminal state. This is calculated by :meth:`compute_terminated` of `GoalEnv`. - truncated (boolean): Whether the truncation condition outside the scope of the MDP is satisfied. Timically, due to a timelimit, but - it is also calculated in :meth:`compute_truncated` of `GoalEnv`. - info (dictionary): Contains auxiliary diagnostic information (helpful for debugging, learning, and logging). In this case there is a single - key `is_success` with a boolean value, True if the `achieved_goal` is the same as the `desired_goal`. - """ - if np.array(action).shape != self.action_space.shape: - raise ValueError("Action dimension mismatch") - - action = np.clip(action, self.action_space.low, self.action_space.high) - self._set_action(action) - - self._mujoco_step(action) - - self._step_callback() - - if self.render_mode == "human": - self.render() - obs = self._get_obs() - - info = { - "is_success": self._is_success(obs["achieved_goal"], self.goal), - } - - terminated = self.compute_terminated(obs["achieved_goal"], self.goal, info) - truncated = self.compute_truncated(obs["achieved_goal"], self.goal, info) - - reward = self.compute_reward(obs["achieved_goal"], self.goal, info) - - return obs, reward, terminated, truncated, info - - def reset( - self, - *, - seed: Optional[int] = None, - options: Optional[dict] = None, - ): - """Reset MuJoCo simulation to initial state. - - Note: Attempt to reset the simulator. Since we randomize initial conditions, it - is possible to get into a state with numerical issues (e.g. due to penetration or - Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand). - In this case, we just keep randomizing until we eventually achieve a valid initial - configuration. - - Args: - seed (optional integer): The seed that is used to initialize the environment's PRNG (`np_random`). Defaults to None. - options (optional dictionary): Can be used when `reset` is override for additional information to specify how the environment is reset. - - Returns: - observation (dictionary) : Observation of the initial state. It should satisfy the `GoalEnv` :attr:`observation_space`. - info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to - the ``info`` returned by :meth:`step`. - """ - super().reset(seed=seed) - did_reset_sim = False - while not did_reset_sim: - did_reset_sim = self._reset_sim() - self.goal = self._sample_goal().copy() - obs = self._get_obs() - if self.render_mode == "human": - self.render() - - return obs, {} - - # Extension methods - # ---------------------------- - def _mujoco_step(self, action): - """Advance the mujoco simulation. - - Override depending on the python binginds, either mujoco or mujoco_py - """ - raise NotImplementedError - - def _reset_sim(self): - """Resets a simulation and indicates whether or not it was successful. - - If a reset was unsuccessful (e.g. if a randomized state caused an error in the - simulation), this method should indicate such a failure by returning False. - In such a case, this method will be called again to attempt a the reset again. - """ - return True - - def _initialize_simulation(self): - """Initialize MuJoCo simulation data structures mjModel and mjData.""" - raise NotImplementedError - - def _get_obs(self): - """Returns the observation.""" - raise NotImplementedError() - - def _set_action(self, action): - """Applies the given action to the simulation.""" - raise NotImplementedError() - - def _is_success(self, achieved_goal, desired_goal): - """Indicates whether or not the achieved goal successfully achieved the desired goal.""" - raise NotImplementedError() - - def _sample_goal(self): - """Samples a new goal and returns it.""" - raise NotImplementedError() - - def _env_setup(self, initial_qpos): - """Initial configuration of the environment. - - Can be used to configure initial state and extract information from the simulation. - """ - pass - - def _render_callback(self): - """A custom callback that is called before rendering. - - Can be used to implement custom visualizations. - """ - pass - - def _step_callback(self): - """A custom callback that is called after stepping the simulation. - - Can be used to enforce additional constraints on the simulation state. - """ - pass - - -class MujocoRobotEnv(BaseRobotEnv): - """Robot base class for fetch and hand environment versions that depend on new mujoco bindings from Deepmind.""" - - def __init__(self, default_camera_config: Optional[dict] = None, **kwargs): - """Initialize mujoco environment. - - The Deepmind mujoco bindings are initialized alongside the respective mujoco_utils. - - Args: - default_camera_config (optional dictionary): dictionary of default mujoco camera parameters for human rendering. Defaults to None. - The keys for this dictionary can be found in the mujoco mjvCamera struct: - https://mujoco.readthedocs.io/en/latest/APIreference.html?highlight=azimuth#mjvcamera. - - - "type" (integer): camera type (mjtCamera) - - "fixedcamid" (integer): fixed camera id - - "trackbodyid": body id to track - - "lookat" (np.ndarray): cartesian (x, y, z) lookat point - - "distance" (float): distance to lookat point or tracked body - - "azimuth" (float): camera azimuth (deg) - - "elevation" (float): camera elevation (deg) - """ - - self._mujoco = mujoco - self._utils = mujoco_utils - - super().__init__(**kwargs) - - from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer - - self.observation_renderer = MujocoRenderer(self.model, self.data, default_camera_config) - - def _initialize_simulation(self): - self.model = self._mujoco.MjModel.from_xml_path(self.fullpath) - self.data = self._mujoco.MjData(self.model) - self._model_names = self._utils.MujocoModelNames(self.model) - - self.model.vis.global_.offwidth = self.observation_width - self.model.vis.global_.offheight = self.observation_height - - self._env_setup(initial_qpos=self.initial_qpos) - self.initial_time = self.data.time - self.initial_qpos = np.copy(self.data.qpos) - self.initial_qvel = np.copy(self.data.qvel) - - def _reset_sim(self): - self.data.time = self.initial_time - self.data.qpos[:] = np.copy(self.initial_qpos) - self.data.qvel[:] = np.copy(self.initial_qvel) - if self.model.na != 0: - self.data.act[:] = None - - mujoco.mj_forward(self.model, self.data) - return super()._reset_sim() - - def render(self): - """Render a frame of the MuJoCo simulation. - - Returns: - rgb image (np.ndarray): if render_mode is "rgb_array", return a 3D image array. - """ - self._render_callback() - return self.observation_renderer.render(self.render_mode) - - def close(self): - """Close contains the code necessary to "clean up" the environment. - - Terminates any existing WindowViewer instances in the Gymnaisum MujocoRenderer. - """ - if self.observation_renderer is not None: - self.observation_renderer.close() - - @property - def dt(self): - """Return the timestep of each Gymanisum step.""" - return self.model.opt.timestep * self.n_substeps - - def _mujoco_step(self, action): - self._mujoco.mj_step(self.model, self.data, nstep=self.n_substeps) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 2a4a125..394e63d 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -19,10 +19,7 @@ class Base(gym.Env): """ metadata = { - "render_modes": [ - "human", - "rgb_array", - ], + "render_modes": [], "render_fps": 25, } n_substeps = 20 @@ -39,7 +36,6 @@ def __init__( observation_height=84, visualization_width=680, visualization_height=680, - render_mode=None, frame_stack=1, channel_last=True, ): @@ -60,7 +56,6 @@ def __init__( self.observation_height = observation_height self.visualization_width = visualization_width self.visualization_height = visualization_height - self.render_mode = render_mode self.frame_stack = frame_stack self._frames = deque([], maxlen=frame_stack) @@ -121,9 +116,9 @@ def _env_setup(self, initial_qpos): def _initialize_observation_space(self): image_shape = ( - (self.observation_width, self.observation_height, 3 * self.frame_stack) + (self.observation_height, self.observation_width, 3 * self.frame_stack) if self.channel_last - else (3 * self.frame_stack, self.observation_width, self.observation_height) + else (3 * self.frame_stack, self.observation_height, self.observation_width) ) obs = self.get_obs() if self.obs_type == "state": @@ -223,8 +218,6 @@ def reset( while not did_reset_sim: did_reset_sim = self._reset_sim() observation = self.get_obs() - if self.render_mode == "human": - self.render() info = {} return observation, info @@ -259,12 +252,13 @@ def step(self, action): self._mujoco.mj_step(self.model, self.data, nstep=2) self._step_callback() observation = self.get_obs() - # observation = self.get_obs() # observation = self._transform_obs(observation) reward = self.get_reward() - done = False - info = {"is_success": self.is_success(), "success": self.is_success()} - return observation, reward, done, info + terminated = is_success = self.is_success() + info = {"is_success": is_success} + + truncated = False + return observation, reward, terminated, truncated, info def _step_callback(self): self._mujoco.mj_forward(self.model, self.data) @@ -348,9 +342,9 @@ def render(self, mode="rgb_array"): render = self.observation_renderer.render("rgb_array", camera_name="camera0") if self.channel_last: - return render + return render.copy() else: - return render.transpose(2, 0, 1) + return render.transpose(2, 0, 1).copy() def _render_callback(self): self._mujoco.mj_forward(self.model, self.data) diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py index 5bb2293..06897cb 100644 --- a/gym_xarm/tasks/lift.py +++ b/gym_xarm/tasks/lift.py @@ -89,7 +89,6 @@ def _get_obs(self, agent_only=False): return np.concatenate( [ eef, - self.eef_velp, self.gripper_angle, ] ) From 79e18a320076e003e59c811267e5c0994097c5d9 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 6 Apr 2024 11:07:51 +0200 Subject: [PATCH 06/16] Cleanup --- gym_xarm/tasks/__init__.py | 186 +++++++---------------------------- gym_xarm/tasks/base.py | 91 ++++++----------- gym_xarm/tasks/lift.py | 44 ++------- gym_xarm/tasks/peg_in_box.py | 2 + gym_xarm/tasks/push.py | 2 + gym_xarm/tasks/reach.py | 2 + tests/test_env.py | 15 +-- 7 files changed, 79 insertions(+), 263 deletions(-) diff --git a/gym_xarm/tasks/__init__.py b/gym_xarm/tasks/__init__.py index a20e29d..3fabb3e 100644 --- a/gym_xarm/tasks/__init__.py +++ b/gym_xarm/tasks/__init__.py @@ -1,44 +1,42 @@ -from collections import OrderedDict, deque - -import gymnasium as gym -import numpy as np -from gymnasium.wrappers import TimeLimit +from collections import OrderedDict from gym_xarm.tasks.base import Base as Base from gym_xarm.tasks.lift import Lift -from gym_xarm.tasks.peg_in_box import PegInBox -from gym_xarm.tasks.push import Push -from gym_xarm.tasks.reach import Reach + +# from gym_xarm.tasks.peg_in_box import PegInBox +# from gym_xarm.tasks.push import Push +# from gym_xarm.tasks.reach import Reach + TASKS = OrderedDict( ( - ( - "reach", - { - "env": Reach, - "action_space": "xyz", - "episode_length": 50, - "description": "Reach a target location with the end effector", - }, - ), - ( - "push", - { - "env": Push, - "action_space": "xyz", - "episode_length": 50, - "description": "Push a cube to a target location", - }, - ), - ( - "peg_in_box", - { - "env": PegInBox, - "action_space": "xyz", - "episode_length": 50, - "description": "Insert a peg into a box", - }, - ), + # ( + # "reach", + # { + # "env": Reach, + # "action_space": "xyz", + # "episode_length": 50, + # "description": "Reach a target location with the end effector", + # }, + # ), + # ( + # "push", + # { + # "env": Push, + # "action_space": "xyz", + # "episode_length": 50, + # "description": "Push a cube to a target location", + # }, + # ), + # ( + # "peg_in_box", + # { + # "env": PegInBox, + # "action_space": "xyz", + # "episode_length": 50, + # "description": "Insert a peg into a box", + # }, + # ), ( "lift", { @@ -50,121 +48,3 @@ ), ) ) - - -class SimXarmWrapper(gym.Wrapper): - """ - DEPRECATED: Use gym.make() - - A wrapper for the SimXarm environments. This wrapper is used to - convert the action and observation spaces to the correct format. - """ - - def __init__(self, env, task, obs_mode, image_size, action_repeat, frame_stack=1, channel_last=False): - super().__init__(env) - self._env = env - self.obs_mode = obs_mode - self.image_size = image_size - self.action_repeat = action_repeat - self.frame_stack = frame_stack - self._frames = deque([], maxlen=frame_stack) - self.channel_last = channel_last - self._max_episode_steps = task["episode_length"] // action_repeat - - image_shape = ( - (image_size, image_size, 3 * frame_stack) - if channel_last - else (3 * frame_stack, image_size, image_size) - ) - if obs_mode == "state": - self.observation_space = env.observation_space["observation"] - elif obs_mode == "rgb": - self.observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8) - elif obs_mode == "all": - self.observation_space = gym.spaces.Dict( - state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32), - rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8), - ) - else: - raise ValueError(f"Unknown obs_mode {obs_mode}. Must be one of [rgb, all, state]") - self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(task["action_space"]),)) - self.action_padding = np.zeros(4 - len(task["action_space"]), dtype=np.float32) - if "w" not in task["action_space"]: - self.action_padding[-1] = 1.0 - - def _render_obs(self): - obs = self.render(mode="rgb_array", width=self.image_size, height=self.image_size) - if not self.channel_last: - obs = obs.transpose(2, 0, 1) - return obs.copy() - - def _update_frames(self, reset=False): - pixels = self._render_obs() - self._frames.append(pixels) - if reset: - for _ in range(1, self.frame_stack): - self._frames.append(pixels) - assert len(self._frames) == self.frame_stack - - def transform_obs(self, obs, reset=False): - if self.obs_mode == "state": - return obs["observation"] - elif self.obs_mode == "rgb": - self._update_frames(reset=reset) - rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) - return rgb_obs - elif self.obs_mode == "all": - self._update_frames(reset=reset) - rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) - return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state))) - else: - raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]") - - def reset(self): - return self.transform_obs(self._env.reset(), reset=True) - - def step(self, action): - action = np.concatenate([action, self.action_padding]) - reward = 0.0 - for _ in range(self.action_repeat): - obs, r, done, info = self._env.step(action) - reward += r - return self.transform_obs(obs), reward, done, info - - def render(self, mode="rgb_array", width=384, height=384, **kwargs): - return self._env.render(mode, width=width, height=height) - - @property - def state(self): - return self._env.robot_state - - -def make(task, obs_mode="state", image_size=84, action_repeat=1, frame_stack=1, channel_last=False, seed=0): - """ - DEPRECATED: Use gym.make() - - Create a new environment. - Args: - task (str): The task to create an environment for. Must be one of: - - 'reach' - - 'push' - - 'peg-in-box' - - 'lift' - obs_mode (str): The observation mode to use. Must be one of: - - 'state': Only state observations - - 'rgb': RGB images - - 'all': RGB images and state observations - image_size (int): The size of the image observations - action_repeat (int): The number of times to repeat the action - seed (int): The random seed to use - Returns: - gym.Env: The environment - """ - if task not in TASKS: - raise ValueError(f"Unknown task {task}. Must be one of {list(TASKS.keys())}") - env = TASKS[task]["env"]() - env = TimeLimit(env, TASKS[task]["episode_length"]) - env = SimXarmWrapper(env, TASKS[task], obs_mode, image_size, action_repeat, frame_stack, channel_last) - env.seed(seed) - - return env diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 394e63d..5ae90ae 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -1,5 +1,4 @@ import os -from collections import deque import gymnasium as gym import mujoco @@ -36,10 +35,9 @@ def __init__( observation_height=84, visualization_width=680, visualization_height=680, - frame_stack=1, channel_last=True, ): - # Env setup + # Coordinates if gripper_rotation is None: gripper_rotation = [0, 1, 0, 0] self.gripper_rotation = np.array(gripper_rotation, dtype=np.float32) @@ -56,8 +54,6 @@ def __init__( self.observation_height = observation_height self.visualization_width = visualization_width self.visualization_height = visualization_height - self.frame_stack = frame_stack - self._frames = deque([], maxlen=frame_stack) # Assets self.xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml") @@ -79,15 +75,6 @@ def __init__( if "w" not in self.metadata["action_space"]: self.action_padding[-1] = 1.0 - # super().__init__( - # xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml"), - # n_substeps=20, - # n_actions=4, - # initial_qpos={}, - # width=image_size, - # height=image_size, - # ) - def _initialize_simulation(self): """Initialize MuJoCo simulation data structures mjModel and mjData.""" self.model = self._mujoco.MjModel.from_xml_path(self.xml_path) @@ -116,9 +103,9 @@ def _env_setup(self, initial_qpos): def _initialize_observation_space(self): image_shape = ( - (self.observation_height, self.observation_width, 3 * self.frame_stack) + (self.observation_height, self.observation_width, 3) if self.channel_last - else (3 * self.frame_stack, self.observation_height, self.observation_width) + else (3, self.observation_height, self.observation_width) ) obs = self.get_obs() if self.obs_type == "state": @@ -145,7 +132,7 @@ def _initialize_renderer(self, type: str): if type == "observation": model = self.model elif type == "visualization": - # HACK: MujoCo doesn't allow for custom size rendering on-the-fly, so we + # HACK: gymnasium doesn't allow for custom size rendering on-the-fly, so we # initialize another renderer with appropriate size for visualization purposes # see https://gymnasium.farama.org/content/migration-guide/#environment-render from copy import deepcopy @@ -165,16 +152,35 @@ def dt(self): @property def eef(self): - return self._utils.get_site_xpos(self.model, self.data, "grasp") + return self._utils.get_site_xpos(self.model, self.data, "grasp") - self.center_of_table @property - def obj(self): - return self._utils.get_site_xpos(self.model, self.data, "object_site") + def eef_velp(self): + return self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt + + @property + def gripper_angle(self): + return self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint") @property def robot_state(self): - gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint") - return np.concatenate([self.eef, gripper_angle]) + return np.concatenate([self.eef - self.center_of_table, self.gripper_angle]) + + @property + def obj(self): + return self._utils.get_site_xpos(self.model, self.data, "object_site") - self.center_of_table + + @property + def obj_rot(self): + return self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:] + + @property + def obj_velp(self): + return self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt + + @property + def obj_velr(self): + return self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt def is_success(self): """Indicates whether or not the achieved goal successfully achieved the desired goal.""" @@ -237,14 +243,6 @@ def _reset_sim(self): mujoco.mj_forward(self.model, self.data) return True - # def reset(self, seed=None, options=None): - # super().reset(seed=seed, options=options) - # self._reset_sim() - # observation = self._get_obs() - # observation = self._transform_obs(observation) - # info = {} - # return observation, info - def step(self, action): assert action.shape == (4,) assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid" @@ -252,12 +250,11 @@ def step(self, action): self._mujoco.mj_step(self.model, self.data, nstep=2) self._step_callback() observation = self.get_obs() - # observation = self._transform_obs(observation) reward = self.get_reward() terminated = is_success = self.is_success() + truncated = False info = {"is_success": is_success} - truncated = False return observation, reward, terminated, truncated, info def _step_callback(self): @@ -307,40 +304,12 @@ def _set_gripper(self, gripper_pos, gripper_rotation): self.data.qpos[10] = 0.0 self.data.qpos[12] = 0.0 - # def _transform_obs(self, obs, reset=False): - # if self.obs_type == "state": - # return obs["observation"] - # elif self.obs_type == "rgb": - # self._update_frames(reset=reset) - # rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) - # return rgb_obs - # elif self.obs_type == "all": - # self._update_frames(reset=reset) - # rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) - # return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state))) - # else: - # raise ValueError(f"Unknown obs_type {self.obs_type}. Must be one of [rgb, all, state]") - - # def _update_frames(self, reset=False): - # pixels = self._render_obs() - # self._frames.append(pixels) - # if reset: - # for _ in range(1, self.frame_stack): - # self._frames.append(pixels) - # assert len(self._frames) == self.frame_stack - - # def _render_obs(self): - # obs = self.render(mode="rgb_array") - # if not self.channel_last: - # obs = obs.transpose(2, 0, 1) - # return obs.copy() - def render(self, mode="rgb_array"): self._render_callback() if mode == "visualize": return self.visualization_renderer.render("rgb_array", camera_name="camera0") - render = self.observation_renderer.render("rgb_array", camera_name="camera0") + render = self.observation_renderer.render(mode, camera_name="camera0") if self.channel_last: return render.copy() else: diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py index 06897cb..12cb564 100644 --- a/gym_xarm/tasks/lift.py +++ b/gym_xarm/tasks/lift.py @@ -19,26 +19,6 @@ def __init__(self, **kwargs): def z_target(self): return self._init_z + self._z_threshold - @property - def eef_velp(self): - return self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt - - @property - def obj_rot(self): - return self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:] - - @property - def obj_velp(self): - return self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt - - @property - def obj_velr(self): - return self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt - - @property - def gripper_angle(self): - return self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint") - def is_success(self): return self.obj[2] >= self.z_target @@ -76,29 +56,19 @@ def get_obs(self): elif self.obs_type == "pixels_agent_pos": return { "pixels": pixels, - "agent_pos": self._get_obs(agent_only=True), + "agent_pos": self.robot_state, } else: raise ValueError( f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]" ) - def _get_obs(self, agent_only=False): - eef = self.eef - self.center_of_table - if agent_only: - return np.concatenate( - [ - eef, - self.gripper_angle, - ] - ) - - obj = self.obj - self.center_of_table + def _get_obs(self): return np.concatenate( [ - eef, + self.eef, self.eef_velp, - obj, + self.obj, self.obj_rot, self.obj_velp, self.obj_velr, @@ -106,10 +76,10 @@ def _get_obs(self, agent_only=False): np.array( [ np.linalg.norm(self.eef - self.obj), - np.linalg.norm(eef[:-1] - obj[:-1]), + np.linalg.norm(self.eef[:-1] - self.obj[:-1]), self.z_target, - self.z_target - obj[-1], - self.z_target - eef[-1], + self.z_target - self.obj[-1], + self.z_target - self.eef[-1], ] ), self.gripper_angle, diff --git a/gym_xarm/tasks/peg_in_box.py b/gym_xarm/tasks/peg_in_box.py index 4f21c48..1db5e42 100644 --- a/gym_xarm/tasks/peg_in_box.py +++ b/gym_xarm/tasks/peg_in_box.py @@ -4,6 +4,8 @@ class PegInBox(Base): + """DEPRECATED: use only Lift for now""" + def __init__(self): super().__init__("peg_in_box") diff --git a/gym_xarm/tasks/push.py b/gym_xarm/tasks/push.py index 8b145a6..daa6b60 100644 --- a/gym_xarm/tasks/push.py +++ b/gym_xarm/tasks/push.py @@ -4,6 +4,8 @@ class Push(Base): + """DEPRECATED: use only Lift for now""" + def __init__(self): super().__init__("push") diff --git a/gym_xarm/tasks/reach.py b/gym_xarm/tasks/reach.py index 94688e7..ee1a533 100644 --- a/gym_xarm/tasks/reach.py +++ b/gym_xarm/tasks/reach.py @@ -4,6 +4,8 @@ class Reach(Base): + """DEPRECATED: use only Lift for now""" + def __init__(self): super().__init__("reach") diff --git a/tests/test_env.py b/tests/test_env.py index 9d2e8ba..f08f951 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -2,29 +2,20 @@ import gymnasium as gym from gymnasium.utils.env_checker import check_env +import gym_xarm # noqa: F401 + @pytest.mark.parametrize( "env_task, obs_type", [ ("XarmLift-v0", "state"), ("XarmLift-v0", "pixels"), ("XarmLift-v0", "pixels_agent_pos"), - # TODO(aliberts): Add other tasks - # ("reach", False, False), - # ("reach", True, False), - # ("push", False, False), - # ("push", True, False), - # ("peg_in_box", False, False), - # ("peg_in_box", True, False), ], ) def test_env(env_task, obs_type): - import gym_xarm # noqa: F401 env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type) - check_env(env.unwrapped, skip_render_check=True) - # env.reset() - # env.render() + check_env(env.unwrapped) if __name__ == "__main__": test_env("XarmLift-v0", "pixels_agent_pos") - # test_env("XarmLift-v0", "state") \ No newline at end of file From 0eb8d27d85e5500186451f9dc694c4a8ab461a44 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 6 Apr 2024 11:17:12 +0200 Subject: [PATCH 07/16] Comment unused tasks --- gym_xarm/tasks/peg_in_box.py | 163 ++++++++++++++++++----------------- gym_xarm/tasks/push.py | 127 +++++++++++++-------------- gym_xarm/tasks/reach.py | 79 ++++++++--------- 3 files changed, 186 insertions(+), 183 deletions(-) diff --git a/gym_xarm/tasks/peg_in_box.py b/gym_xarm/tasks/peg_in_box.py index 1db5e42..d9425af 100644 --- a/gym_xarm/tasks/peg_in_box.py +++ b/gym_xarm/tasks/peg_in_box.py @@ -1,4 +1,4 @@ -import numpy as np +# import numpy as np from gym_xarm.tasks import Base @@ -6,83 +6,84 @@ class PegInBox(Base): """DEPRECATED: use only Lift for now""" - def __init__(self): - super().__init__("peg_in_box") - - def _reset_sim(self): - self._act_magnitude = 0 - super()._reset_sim() - for _ in range(10): - self._apply_action(np.array([0, 0, 0, 1], dtype=np.float32)) - self.sim.step() - - @property - def box(self): - return self.sim.data.get_site_xpos("box_site") - - def is_success(self): - return np.linalg.norm(self.obj - self.box) <= 0.05 - - def get_reward(self): - dist_xy = np.linalg.norm(self.obj[:2] - self.box[:2]) - dist_xyz = np.linalg.norm(self.obj - self.box) - return float(dist_xy <= 0.045) * (2 - 6 * dist_xyz) - 0.2 * np.square(self._act_magnitude) - dist_xy - - def _get_obs(self): - eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt - gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") - eef, box = self.eef - self.center_of_table, self.box - self.center_of_table - - obj = self.obj - self.center_of_table - obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:] - obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt - obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt - - obs = np.concatenate( - [ - eef, - eef_velp, - box, - obj, - obj_rot, - obj_velp, - obj_velr, - eef - box, - eef - obj, - obj - box, - np.array( - [ - np.linalg.norm(eef - box), - np.linalg.norm(eef - obj), - np.linalg.norm(obj - box), - gripper_angle, - ] - ), - ], - axis=0, - ) - return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": box} - - def _sample_goal(self): - # Gripper - gripper_pos = np.array([1.280, 0.295, 0.9]) + self.np_random.uniform(-0.05, 0.05, size=3) - super()._set_gripper(gripper_pos, self.gripper_rotation) - - # Object - object_pos = gripper_pos - np.array([0, 0, 0.06]) + self.np_random.uniform(-0.005, 0.005, size=3) - object_qpos = self.sim.data.get_joint_qpos("object_joint0") - object_qpos[:3] = object_pos - self.sim.data.set_joint_qpos("object_joint0", object_qpos) - - # Box - box_pos = np.array([1.61, 0.18, 0.58]) - box_pos[:2] += self.np_random.uniform(-0.11, 0.11, size=2) - box_qpos = self.sim.data.get_joint_qpos("box_joint0") - box_qpos[:3] = box_pos - self.sim.data.set_joint_qpos("box_joint0", box_qpos) - - return self.box - - def step(self, action): - self._act_magnitude = np.linalg.norm(action[:3]) - return super().step(action) + ... + # def __init__(self): + # super().__init__("peg_in_box") + + # def _reset_sim(self): + # self._act_magnitude = 0 + # super()._reset_sim() + # for _ in range(10): + # self._apply_action(np.array([0, 0, 0, 1], dtype=np.float32)) + # self.sim.step() + + # @property + # def box(self): + # return self.sim.data.get_site_xpos("box_site") + + # def is_success(self): + # return np.linalg.norm(self.obj - self.box) <= 0.05 + + # def get_reward(self): + # dist_xy = np.linalg.norm(self.obj[:2] - self.box[:2]) + # dist_xyz = np.linalg.norm(self.obj - self.box) + # return float(dist_xy <= 0.045) * (2 - 6 * dist_xyz) - 0.2 * np.square(self._act_magnitude) - dist_xy + + # def _get_obs(self): + # eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt + # gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") + # eef, box = self.eef - self.center_of_table, self.box - self.center_of_table + + # obj = self.obj - self.center_of_table + # obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:] + # obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt + # obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt + + # obs = np.concatenate( + # [ + # eef, + # eef_velp, + # box, + # obj, + # obj_rot, + # obj_velp, + # obj_velr, + # eef - box, + # eef - obj, + # obj - box, + # np.array( + # [ + # np.linalg.norm(eef - box), + # np.linalg.norm(eef - obj), + # np.linalg.norm(obj - box), + # gripper_angle, + # ] + # ), + # ], + # axis=0, + # ) + # return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": box} + + # def _sample_goal(self): + # # Gripper + # gripper_pos = np.array([1.280, 0.295, 0.9]) + self.np_random.uniform(-0.05, 0.05, size=3) + # super()._set_gripper(gripper_pos, self.gripper_rotation) + + # # Object + # object_pos = gripper_pos - np.array([0, 0, 0.06]) + self.np_random.uniform(-0.005, 0.005, size=3) + # object_qpos = self.sim.data.get_joint_qpos("object_joint0") + # object_qpos[:3] = object_pos + # self.sim.data.set_joint_qpos("object_joint0", object_qpos) + + # # Box + # box_pos = np.array([1.61, 0.18, 0.58]) + # box_pos[:2] += self.np_random.uniform(-0.11, 0.11, size=2) + # box_qpos = self.sim.data.get_joint_qpos("box_joint0") + # box_qpos[:3] = box_pos + # self.sim.data.set_joint_qpos("box_joint0", box_qpos) + + # return self.box + + # def step(self, action): + # self._act_magnitude = np.linalg.norm(action[:3]) + # return super().step(action) diff --git a/gym_xarm/tasks/push.py b/gym_xarm/tasks/push.py index daa6b60..0efc933 100644 --- a/gym_xarm/tasks/push.py +++ b/gym_xarm/tasks/push.py @@ -1,4 +1,4 @@ -import numpy as np +# import numpy as np from gym_xarm.tasks import Base @@ -6,75 +6,76 @@ class Push(Base): """DEPRECATED: use only Lift for now""" - def __init__(self): - super().__init__("push") + ... + # def __init__(self): + # super().__init__("push") - def _reset_sim(self): - self._act_magnitude = 0 - super()._reset_sim() + # def _reset_sim(self): + # self._act_magnitude = 0 + # super()._reset_sim() - def is_success(self): - return np.linalg.norm(self.obj - self.goal) <= 0.05 + # def is_success(self): + # return np.linalg.norm(self.obj - self.goal) <= 0.05 - def get_reward(self): - dist = np.linalg.norm(self.obj - self.goal) - penalty = self._act_magnitude**2 - return -(dist + 0.15 * penalty) + # def get_reward(self): + # dist = np.linalg.norm(self.obj - self.goal) + # penalty = self._act_magnitude**2 + # return -(dist + 0.15 * penalty) - def _get_obs(self): - eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt - gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") - eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table + # def _get_obs(self): + # eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt + # gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") + # eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table - obj = self.obj - self.center_of_table - obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:] - obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt - obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt + # obj = self.obj - self.center_of_table + # obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:] + # obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt + # obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt - obs = np.concatenate( - [ - eef, - eef_velp, - goal, - obj, - obj_rot, - obj_velp, - obj_velr, - eef - goal, - eef - obj, - obj - goal, - np.array( - [ - np.linalg.norm(eef - goal), - np.linalg.norm(eef - obj), - np.linalg.norm(obj - goal), - gripper_angle, - ] - ), - ], - axis=0, - ) - return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal} + # obs = np.concatenate( + # [ + # eef, + # eef_velp, + # goal, + # obj, + # obj_rot, + # obj_velp, + # obj_velr, + # eef - goal, + # eef - obj, + # obj - goal, + # np.array( + # [ + # np.linalg.norm(eef - goal), + # np.linalg.norm(eef - obj), + # np.linalg.norm(obj - goal), + # gripper_angle, + # ] + # ), + # ], + # axis=0, + # ) + # return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal} - def _sample_goal(self): - # Gripper - gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3) - super()._set_gripper(gripper_pos, self.gripper_rotation) + # def _sample_goal(self): + # # Gripper + # gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3) + # super()._set_gripper(gripper_pos, self.gripper_rotation) - # Object - object_pos = self.center_of_table - np.array([0.25, 0, 0.07]) - object_pos[0] += self.np_random.uniform(-0.08, 0.08, size=1) - object_pos[1] += self.np_random.uniform(-0.08, 0.08, size=1) - object_qpos = self.sim.data.get_joint_qpos("object_joint0") - object_qpos[:3] = object_pos - self.sim.data.set_joint_qpos("object_joint0", object_qpos) + # # Object + # object_pos = self.center_of_table - np.array([0.25, 0, 0.07]) + # object_pos[0] += self.np_random.uniform(-0.08, 0.08, size=1) + # object_pos[1] += self.np_random.uniform(-0.08, 0.08, size=1) + # object_qpos = self.sim.data.get_joint_qpos("object_joint0") + # object_qpos[:3] = object_pos + # self.sim.data.set_joint_qpos("object_joint0", object_qpos) - # Goal - self.goal = np.array([1.600, 0.200, 0.545]) - self.goal[:2] += self.np_random.uniform(-0.1, 0.1, size=2) - self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal - return self.goal + # # Goal + # self.goal = np.array([1.600, 0.200, 0.545]) + # self.goal[:2] += self.np_random.uniform(-0.1, 0.1, size=2) + # self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal + # return self.goal - def step(self, action): - self._act_magnitude = np.linalg.norm(action[:3]) - return super().step(action) + # def step(self, action): + # self._act_magnitude = np.linalg.norm(action[:3]) + # return super().step(action) diff --git a/gym_xarm/tasks/reach.py b/gym_xarm/tasks/reach.py index ee1a533..828e989 100644 --- a/gym_xarm/tasks/reach.py +++ b/gym_xarm/tasks/reach.py @@ -1,4 +1,4 @@ -import numpy as np +# import numpy as np from gym_xarm.tasks import Base @@ -6,41 +6,42 @@ class Reach(Base): """DEPRECATED: use only Lift for now""" - def __init__(self): - super().__init__("reach") - - def _reset_sim(self): - self._act_magnitude = 0 - super()._reset_sim() - - def is_success(self): - return np.linalg.norm(self.eef - self.goal) <= 0.05 - - def get_reward(self): - dist = np.linalg.norm(self.eef - self.goal) - penalty = self._act_magnitude**2 - return -(dist + 0.15 * penalty) - - def _get_obs(self): - eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt - gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") - eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table - obs = np.concatenate( - [eef, eef_velp, goal, eef - goal, np.array([np.linalg.norm(eef - goal), gripper_angle])], axis=0 - ) - return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal} - - def _sample_goal(self): - # Gripper - gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3) - super()._set_gripper(gripper_pos, self.gripper_rotation) - - # Goal - self.goal = np.array([1.550, 0.287, 0.580]) - self.goal[:2] += self.np_random.uniform(-0.125, 0.125, size=2) - self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal - return self.goal - - def step(self, action): - self._act_magnitude = np.linalg.norm(action[:3]) - return super().step(action) + ... + # def __init__(self): + # super().__init__("reach") + + # def _reset_sim(self): + # self._act_magnitude = 0 + # super()._reset_sim() + + # def is_success(self): + # return np.linalg.norm(self.eef - self.goal) <= 0.05 + + # def get_reward(self): + # dist = np.linalg.norm(self.eef - self.goal) + # penalty = self._act_magnitude**2 + # return -(dist + 0.15 * penalty) + + # def _get_obs(self): + # eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt + # gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") + # eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table + # obs = np.concatenate( + # [eef, eef_velp, goal, eef - goal, np.array([np.linalg.norm(eef - goal), gripper_angle])], axis=0 + # ) + # return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal} + + # def _sample_goal(self): + # # Gripper + # gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3) + # super()._set_gripper(gripper_pos, self.gripper_rotation) + + # # Goal + # self.goal = np.array([1.550, 0.287, 0.580]) + # self.goal[:2] += self.np_random.uniform(-0.125, 0.125, size=2) + # self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal + # return self.goal + + # def step(self, action): + # self._act_magnitude = np.linalg.norm(action[:3]) + # return super().step(action) From 4c65f3ed8c7aeb938cc42a3c8af7b31a319c10ef Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 6 Apr 2024 11:39:47 +0200 Subject: [PATCH 08/16] Set MUJOCO_GL on-the-fly --- .github/workflows/test.yml | 1 - gym_xarm/tasks/base.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2769dcd..dafaf9e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,6 @@ jobs: runs-on: ubuntu-latest env: POETRY_VERSION: 1.8.2 - MUJOCO_GL: egl steps: #---------------------------------------------- # check-out repo and set-up python diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 5ae90ae..5f7f893 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -143,6 +143,9 @@ def _initialize_renderer(self, type: str): else: raise ValueError(f"Unknown renderer type {type}. Must be one of [observation, visualization]") + if os.environ.get("MUJOCO_GL") not in ["glfw", "egl", "osmesa"]: + os.environ["MUJOCO_GL"] = "egl" + return MujocoRenderer(model, self.data) @property From d06072c47d4084b7f329e3e149f5f296badb5dce Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 6 Apr 2024 11:49:12 +0200 Subject: [PATCH 09/16] Use _ALL_RENDERERS --- gym_xarm/tasks/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 5f7f893..4d8affa 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -3,7 +3,7 @@ import gymnasium as gym import mujoco import numpy as np -from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer +from gymnasium.envs.mujoco.mujoco_rendering import _ALL_RENDERERS, MujocoRenderer from gymnasium_robotics.utils import mujoco_utils from gym_xarm.tasks import mocap @@ -143,7 +143,7 @@ def _initialize_renderer(self, type: str): else: raise ValueError(f"Unknown renderer type {type}. Must be one of [observation, visualization]") - if os.environ.get("MUJOCO_GL") not in ["glfw", "egl", "osmesa"]: + if os.environ.get("MUJOCO_GL") not in _ALL_RENDERERS: os.environ["MUJOCO_GL"] = "egl" return MujocoRenderer(model, self.data) From 3a72cf0a36c2a9f73ebf1020705ea26f20a37dc0 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 6 Apr 2024 13:00:22 +0200 Subject: [PATCH 10/16] Cleanup --- .github/workflows/test.yml | 2 +- tests/test_env.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dafaf9e..6f6711d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -83,7 +83,7 @@ jobs: path: .venv key: venv-${{ steps.setup-python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }} - - name: Install libegl1-mesa-dev (to use MUJOCO_GL=egl) + - name: Install libegl1-mesa-dev run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev #---------------------------------------------- diff --git a/tests/test_env.py b/tests/test_env.py index f08f951..098f25e 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -15,7 +15,3 @@ def test_env(env_task, obs_type): env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type) check_env(env.unwrapped) - - -if __name__ == "__main__": - test_env("XarmLift-v0", "pixels_agent_pos") From e54cdcc9ef40fa961e80239f8b6dd1640dac6e8e Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 6 Apr 2024 14:55:11 +0200 Subject: [PATCH 11/16] Move get_obs, use render_mode --- gym_xarm/tasks/base.py | 64 +++++++++++++++++++++++++----------------- gym_xarm/tasks/lift.py | 16 ----------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 4d8affa..6f7e73a 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -18,7 +18,7 @@ class Base(gym.Env): """ metadata = { - "render_modes": [], + "render_modes": ["human", "rgb_array"], "render_fps": 25, } n_substeps = 20 @@ -30,12 +30,12 @@ def __init__( self, task, obs_type="state", + render_mode="rgb_array", gripper_rotation=None, observation_width=84, observation_height=84, visualization_width=680, visualization_height=680, - channel_last=True, ): # Coordinates if gripper_rotation is None: @@ -47,9 +47,9 @@ def __init__( # Observations self.obs_type = obs_type - self.channel_last = channel_last # Rendering + self.render_mode = render_mode self.observation_width = observation_width self.observation_height = observation_height self.visualization_width = visualization_width @@ -62,8 +62,8 @@ def __init__( # Initialize sim, spaces & renderers self._initialize_simulation() - self.observation_renderer = self._initialize_renderer(type="observation") - self.visualization_renderer = self._initialize_renderer(type="visualization") + self.observation_renderer = self._initialize_renderer(render_type="observation") + self.visualization_renderer = self._initialize_renderer(render_type="visualization") self.observation_space = self._initialize_observation_space() self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),)) self.action_padding = np.zeros(4 - len(self.metadata["action_space"]), dtype=np.float32) @@ -102,11 +102,7 @@ def _env_setup(self, initial_qpos): mujoco.mj_forward(self.model, self.data) def _initialize_observation_space(self): - image_shape = ( - (self.observation_height, self.observation_width, 3) - if self.channel_last - else (3, self.observation_height, self.observation_width) - ) + image_shape = (self.observation_height, self.observation_width, 3) obs = self.get_obs() if self.obs_type == "state": observation_space = gym.spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype=np.float64) @@ -128,10 +124,10 @@ def _initialize_observation_space(self): return observation_space - def _initialize_renderer(self, type: str): - if type == "observation": + def _initialize_renderer(self, render_type: str): + if render_type == "observation": model = self.model - elif type == "visualization": + elif render_type == "visualization": # HACK: gymnasium doesn't allow for custom size rendering on-the-fly, so we # initialize another renderer with appropriate size for visualization purposes # see https://gymnasium.farama.org/content/migration-guide/#environment-render @@ -141,7 +137,9 @@ def _initialize_renderer(self, type: str): model.vis.global_.offwidth = self.visualization_width model.vis.global_.offheight = self.visualization_height else: - raise ValueError(f"Unknown renderer type {type}. Must be one of [observation, visualization]") + raise ValueError( + f"Unknown render type {render_type}. Must be one of [observation, visualization]" + ) if os.environ.get("MUJOCO_GL") not in _ALL_RENDERERS: os.environ["MUJOCO_GL"] = "egl" @@ -196,9 +194,6 @@ def _sample_goal(self): """Samples a new goal and returns it.""" raise NotImplementedError() - def get_obs(self): - raise NotImplementedError() - def reset( self, *, @@ -246,6 +241,22 @@ def _reset_sim(self): mujoco.mj_forward(self.model, self.data) return True + def get_obs(self): + if self.obs_type == "state": + return self._get_obs() + pixels = self._render(render_type="observation") + if self.obs_type == "pixels": + return pixels + elif self.obs_type == "pixels_agent_pos": + return { + "pixels": pixels, + "agent_pos": self.robot_state, + } + else: + raise ValueError( + f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]" + ) + def step(self, action): assert action.shape == (4,) assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid" @@ -307,16 +318,19 @@ def _set_gripper(self, gripper_pos, gripper_rotation): self.data.qpos[10] = 0.0 self.data.qpos[12] = 0.0 - def render(self, mode="rgb_array"): - self._render_callback() - if mode == "visualize": - return self.visualization_renderer.render("rgb_array", camera_name="camera0") + def render(self): + return self._render(render_type="visualization") - render = self.observation_renderer.render(mode, camera_name="camera0") - if self.channel_last: - return render.copy() + def _render(self, render_type): + self._render_callback() + if render_type == "visualization": + render = self.visualization_renderer.render(self.render_mode, camera_name="camera0") + elif render_type == "observation": + render = self.observation_renderer.render(self.render_mode, camera_name="camera0") else: - return render.transpose(2, 0, 1).copy() + raise ValueError(render_type) + + return render.copy() def _render_callback(self): self._mujoco.mj_forward(self.model, self.data) diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py index 12cb564..4ecd0c2 100644 --- a/gym_xarm/tasks/lift.py +++ b/gym_xarm/tasks/lift.py @@ -47,22 +47,6 @@ def get_reward(self): return reach_reward / 100 + pick_reward - def get_obs(self): - if self.obs_type == "state": - return self._get_obs() - pixels = self.render() - if self.obs_type == "pixels": - return pixels - elif self.obs_type == "pixels_agent_pos": - return { - "pixels": pixels, - "agent_pos": self.robot_state, - } - else: - raise ValueError( - f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]" - ) - def _get_obs(self): return np.concatenate( [ From 4c3a5bdb0105ef09abe5402f718608e3741a5625 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 6 Apr 2024 15:51:33 +0200 Subject: [PATCH 12/16] WIP --- gym_xarm/tasks/base.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 6f7e73a..cbbd46e 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -8,6 +8,12 @@ from gym_xarm.tasks import mocap +RENDER_MODES = ["rgb_array"] +if os.environ.get("MUJOCO_GL") == "glfw": + RENDER_MODES.append("human") +elif os.environ.get("MUJOCO_GL") not in _ALL_RENDERERS: + os.environ["MUJOCO_GL"] = "egl" + class Base(gym.Env): """ @@ -18,7 +24,7 @@ class Base(gym.Env): """ metadata = { - "render_modes": ["human", "rgb_array"], + "render_modes": RENDER_MODES, "render_fps": 25, } n_substeps = 20 @@ -141,9 +147,6 @@ def _initialize_renderer(self, render_type: str): f"Unknown render type {render_type}. Must be one of [observation, visualization]" ) - if os.environ.get("MUJOCO_GL") not in _ALL_RENDERERS: - os.environ["MUJOCO_GL"] = "egl" - return MujocoRenderer(model, self.data) @property @@ -325,12 +328,14 @@ def _render(self, render_type): self._render_callback() if render_type == "visualization": render = self.visualization_renderer.render(self.render_mode, camera_name="camera0") + if self.render_mode != "human": + render = render.copy() elif render_type == "observation": - render = self.observation_renderer.render(self.render_mode, camera_name="camera0") + render = self.observation_renderer.render("rgb_array", camera_name="camera0").copy() else: raise ValueError(render_type) - return render.copy() + return render def _render_callback(self): self._mujoco.mj_forward(self.model, self.data) From 8c9b4274ab7436cdb3378d099acd509f35e77051 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sat, 6 Apr 2024 16:30:08 +0200 Subject: [PATCH 13/16] Pass renderer object on _render calls --- gym_xarm/tasks/base.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index cbbd46e..7e2ddc4 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -68,8 +68,8 @@ def __init__( # Initialize sim, spaces & renderers self._initialize_simulation() - self.observation_renderer = self._initialize_renderer(render_type="observation") - self.visualization_renderer = self._initialize_renderer(render_type="visualization") + self.observation_renderer = self._initialize_renderer(renderer_type="observation") + self.visualization_renderer = self._initialize_renderer(renderer_type="visualization") self.observation_space = self._initialize_observation_space() self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),)) self.action_padding = np.zeros(4 - len(self.metadata["action_space"]), dtype=np.float32) @@ -130,10 +130,10 @@ def _initialize_observation_space(self): return observation_space - def _initialize_renderer(self, render_type: str): - if render_type == "observation": + def _initialize_renderer(self, renderer_type: str): + if renderer_type == "observation": model = self.model - elif render_type == "visualization": + elif renderer_type == "visualization": # HACK: gymnasium doesn't allow for custom size rendering on-the-fly, so we # initialize another renderer with appropriate size for visualization purposes # see https://gymnasium.farama.org/content/migration-guide/#environment-render @@ -144,7 +144,7 @@ def _initialize_renderer(self, render_type: str): model.vis.global_.offheight = self.visualization_height else: raise ValueError( - f"Unknown render type {render_type}. Must be one of [observation, visualization]" + f"Unknown render type {renderer_type}. Must be one of [observation, visualization]" ) return MujocoRenderer(model, self.data) @@ -247,7 +247,7 @@ def _reset_sim(self): def get_obs(self): if self.obs_type == "state": return self._get_obs() - pixels = self._render(render_type="observation") + pixels = self._render(renderer=self.observation_renderer) if self.obs_type == "pixels": return pixels elif self.obs_type == "pixels_agent_pos": @@ -322,20 +322,12 @@ def _set_gripper(self, gripper_pos, gripper_rotation): self.data.qpos[12] = 0.0 def render(self): - return self._render(render_type="visualization") + return self._render(renderer=self.visualization_renderer) - def _render(self, render_type): + def _render(self, renderer: MujocoRenderer): self._render_callback() - if render_type == "visualization": - render = self.visualization_renderer.render(self.render_mode, camera_name="camera0") - if self.render_mode != "human": - render = render.copy() - elif render_type == "observation": - render = self.observation_renderer.render("rgb_array", camera_name="camera0").copy() - else: - raise ValueError(render_type) - - return render + render = renderer.render(self.render_mode, camera_name="camera0") + return render.copy() if render is not None else None def _render_callback(self): self._mujoco.mj_forward(self.model, self.data) From 9a03c0a65302fad768f1dcf80e438a013fd60f5c Mon Sep 17 00:00:00 2001 From: Simon Alibert <75076266+aliberts@users.noreply.github.com> Date: Sun, 7 Apr 2024 14:22:41 +0200 Subject: [PATCH 14/16] Remove GoalEnv mention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com> --- gym_xarm/tasks/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py index 7e2ddc4..4ab86d5 100644 --- a/gym_xarm/tasks/base.py +++ b/gym_xarm/tasks/base.py @@ -216,7 +216,7 @@ def reset( options (optional dictionary): Can be used when `reset` is override for additional information to specify how the environment is reset. Returns: - observation (dictionary) : Observation of the initial state. It should satisfy the `GoalEnv` :attr:`observation_space`. + observation (dictionary) : Observation of the initial state. info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to the ``info`` returned by :meth:`step`. """ From 14875d37f4125423510f848eea7537e6fb5c5bf9 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sun, 7 Apr 2024 14:37:07 +0200 Subject: [PATCH 15/16] Add example, Update README --- .pre-commit-config.yaml | 2 +- README.md | 62 ++++++++++++++++++++++++++++++++++++----- example.py | 15 ++++++++++ 3 files changed, 71 insertions(+), 8 deletions(-) create mode 100644 example.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1f23d75..d998dac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: ^(data/|tests/) +exclude: ^(data/|tests/|example.py) default_language_version: python: python3.10 repos: diff --git a/README.md b/README.md index e47ee1a..4312704 100644 --- a/README.md +++ b/README.md @@ -5,17 +5,11 @@ A gym environment for xArm TDMPC policy on xArm env -## Acknowledgment - -gym-xarm is adapted from [FOWM](https://www.yunhaifeng.com/FOWM/) - - ## Installation Create a virtual environment with Python 3.10 and activate it, e.g. with [`miniconda`](https://docs.anaconda.com/free/miniconda/index.html): ```bash -conda create -y -n xarm python=3.10 -conda activate xarm +conda create -y -n xarm python=3.10 && conda activate xarm ``` Install gym-xarm: @@ -24,6 +18,50 @@ pip install gym-xarm ``` +## Quickstart + +```python +# example.py +import gymnasium as gym +import gym_xarm + +env = gym.make("gym_xarm/XarmLift-v0", render_mode="human") +observation, info = env.reset() + +for _ in range(1000): + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) + image = env.render() + + if terminated or truncated: + observation, info = env.reset() + +env.close() +``` + +To use this [example](./example.py) with `render_mode="human"`, you should set the environment variable `export MUJOCO_GL=glfw` or simply run +```bash +MUJOCO_GL=glfw python example.py +``` + +## Description for `Lift` task + +The goal of the agent is to lift the block above a height threshold. The agent is an xArm robot arm and the block is a cube. + +### Action Space + +The action space is continuous and consists of four values [x, y, z, w]: +- [x, y, z] represent the position of the end effector +- [w] represents the gripper control + +### Observation Space + +Observation space is dependent on the value set to `obs_type`: +- `"state"`: observations contain agent and object state vectors only (no rendering) +- `"pixels"`: observations contains rendered image only (no state vectors) +- `"pixels_agent_pos"`: contains rendered image and agent state vector + + ## Contribute Instead of using `pip` directly, we use `poetry` for development purposes to easily track our dependencies. @@ -50,3 +88,13 @@ pre-commit install # apply style and linter checks on staged files pre-commit ``` + + +## Acknowledgment + +gym-xarm is adapted from [FOWM](https://www.yunhaifeng.com/FOWM/) and is based on work by [Nicklas Hansen](https://nicklashansen.github.io/), [Yanjie Ze](https://yanjieze.com/), [Rishabh Jangir](https://jangirrishabh.github.io/), [Mohit Jain](https://natsu6767.github.io/), and [Sambaran Ghosal](https://github.com/SambaranRepo) as part of the following publications: +* [Self-Supervised Policy Adaptation During Deployment](https://arxiv.org/abs/2007.04309) +* [Generalization in Reinforcement Learning by Soft Data Augmentation](https://arxiv.org/abs/2011.13389) +* [Stabilizing Deep Q-Learning with ConvNets and Vision Transformers under Data Augmentation](https://arxiv.org/abs/2107.00644) +* [Look Closer: Bridging Egocentric and Third-Person Views with Transformers for Robotic Manipulation](https://arxiv.org/abs/2201.07779) +* [Visual Reinforcement Learning with Self-Supervised 3D Representations](https://arxiv.org/abs/2210.07241) diff --git a/example.py b/example.py new file mode 100644 index 0000000..40d565b --- /dev/null +++ b/example.py @@ -0,0 +1,15 @@ +import gymnasium as gym +import gym_xarm + +env = gym.make("gym_xarm/XarmLift-v0", render_mode="human") +observation, info = env.reset() + +for _ in range(1000): + action = env.action_space.sample() + observation, reward, terminated, truncated, info = env.step(action) + image = env.render() + + if terminated or truncated: + observation, info = env.reset() + +env.close() From 07a139ab2f898beacc2a59e52dc3cea1b5dca96a Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Sun, 7 Apr 2024 14:41:15 +0200 Subject: [PATCH 16/16] pre-commit run -a --- .pre-commit-config.yaml | 2 +- poetry.lock | 3 +-- tests/test_env.py | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d998dac..bb3fc90 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: ^(data/|tests/|example.py) +exclude: ^(example.py) default_language_version: python: python3.10 repos: diff --git a/poetry.lock b/poetry.lock index dc06fe1..a293551 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.0 and should not be changed by hand. [[package]] name = "absl-py" @@ -755,7 +755,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, diff --git a/tests/test_env.py b/tests/test_env.py index 098f25e..8e11f68 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,9 +1,10 @@ -import pytest import gymnasium as gym +import pytest from gymnasium.utils.env_checker import check_env import gym_xarm # noqa: F401 + @pytest.mark.parametrize( "env_task, obs_type", [