From 0024fd7edd2fb25546f25079643e2fe9a48f3618 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Thu, 4 Apr 2024 18:38:58 +0200
Subject: [PATCH 01/16] WIP

---
 gym_xarm/__init__.py   |   2 +-
 gym_xarm/robot_env.py  | 306 +++++++++++++++++++++++++++++++++++++++++
 gym_xarm/tasks/base.py | 259 ++++++++++++++++++++++++----------
 gym_xarm/tasks/lift.py |  13 +-
 tests/test_env.py      |   4 +-
 5 files changed, 508 insertions(+), 76 deletions(-)
 create mode 100644 gym_xarm/robot_env.py

diff --git a/gym_xarm/__init__.py b/gym_xarm/__init__.py
index 0bae784..90ba44a 100644
--- a/gym_xarm/__init__.py
+++ b/gym_xarm/__init__.py
@@ -4,5 +4,5 @@
     id="gym_xarm/XarmLift-v0",
     entry_point="gym_xarm.tasks:Lift",
     max_episode_steps=300,
-    kwargs={"obs_mode": "state"},
+    kwargs={"obs_type": "state"},
 )
diff --git a/gym_xarm/robot_env.py b/gym_xarm/robot_env.py
new file mode 100644
index 0000000..d1d732e
--- /dev/null
+++ b/gym_xarm/robot_env.py
@@ -0,0 +1,306 @@
+import os
+from typing import Optional
+
+import gymnasium as gym
+import mujoco
+import numpy as np
+from gymnasium import spaces
+from gymnasium_robotics.utils import mujoco_utils
+
+DEFAULT_SIZE = 480
+
+
+class BaseRobotEnv(gym.Env):
+    """Modified version of BaseRobotEnv to be a `gym.Env` instead of a `gymnasium_robotics.GoalEnv`"""
+
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+        ],
+        "render_fps": 25,
+    }
+
+    def __init__(
+        self,
+        model_path: str,
+        initial_qpos,
+        n_actions: int,
+        n_substeps: int,
+        render_mode: Optional[str] = None,
+        width: int = DEFAULT_SIZE,
+        height: int = DEFAULT_SIZE,
+    ):
+        """Initialize the hand and fetch robot superclass.
+
+        Args:
+            model_path (string): the path to the mjcf MuJoCo model.
+            initial_qpos (np.ndarray): initial position value of the joints in the MuJoCo simulation.
+            n_actions (integer): size of the action space.
+            n_substeps (integer): number of MuJoCo simulation timesteps per Gymnasium step.
+            render_mode (optional string): type of rendering mode, "human" for window rendeirng and "rgb_array" for offscreen. Defaults to None.
+            width (optional integer): width of each rendered frame. Defaults to DEFAULT_SIZE.
+            height (optional integer): height of each rendered frame . Defaults to DEFAULT_SIZE.
+        """
+        if model_path.startswith("/"):
+            self.fullpath = model_path
+        else:
+            self.fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
+        if not os.path.exists(self.fullpath):
+            raise OSError(f"File {self.fullpath} does not exist")
+
+        self.n_substeps = n_substeps
+
+        self.initial_qpos = initial_qpos
+
+        self.width = width
+        self.height = height
+        self._initialize_simulation()
+
+        self.goal = np.zeros(0)
+        obs = self._get_obs()
+
+        assert (
+            int(np.round(1.0 / self.dt)) == self.metadata["render_fps"]
+        ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}'
+
+        self.action_space = spaces.Box(-1.0, 1.0, shape=(n_actions,), dtype="float32")
+        self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype="float64")
+        # self.observation_space = spaces.Dict(
+        #     dict(
+        #         desired_goal=spaces.Box(
+        #             -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64"
+        #         ),
+        #         achieved_goal=spaces.Box(
+        #             -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64"
+        #         ),
+        #         observation=spaces.Box(
+        #             -np.inf, np.inf, shape=obs["observation"].shape, dtype="float64"
+        #         ),
+        #     )
+        # )
+
+        self.render_mode = render_mode
+
+    # Env methods
+    # ----------------------------
+    def compute_terminated(self, achieved_goal, desired_goal, info):
+        """All the available environments are currently continuing tasks and non-time dependent. The objective is to reach the goal for an indefinite period of time."""
+        return False
+
+    def compute_truncated(self, achievec_goal, desired_goal, info):
+        """The environments will be truncated only if setting a time limit with max_steps which will automatically wrap the environment in a gymnasium TimeLimit wrapper."""
+        return False
+
+    def step(self, action):
+        """Run one timestep of the environment's dynamics using the agent actions.
+
+        Args:
+            action (np.ndarray): Control action to be applied to the agent and update the simulation. Should be of shape :attr:`action_space`.
+
+        Returns:
+            observation (dictionary): Next observation due to the agent actions .It should satisfy the `GoalEnv` :attr:`observation_space`.
+            reward (integer): The reward as a result of taking the action. This is calculated by :meth:`compute_reward` of `GoalEnv`.
+            terminated (boolean): Whether the agent reaches the terminal state. This is calculated by :meth:`compute_terminated` of `GoalEnv`.
+            truncated (boolean): Whether the truncation condition outside the scope of the MDP is satisfied. Timically, due to a timelimit, but
+            it is also calculated in :meth:`compute_truncated` of `GoalEnv`.
+            info (dictionary): Contains auxiliary diagnostic information (helpful for debugging, learning, and logging). In this case there is a single
+            key `is_success` with a boolean value, True if the `achieved_goal` is the same as the `desired_goal`.
+        """
+        if np.array(action).shape != self.action_space.shape:
+            raise ValueError("Action dimension mismatch")
+
+        action = np.clip(action, self.action_space.low, self.action_space.high)
+        self._set_action(action)
+
+        self._mujoco_step(action)
+
+        self._step_callback()
+
+        if self.render_mode == "human":
+            self.render()
+        obs = self._get_obs()
+
+        info = {
+            "is_success": self._is_success(obs["achieved_goal"], self.goal),
+        }
+
+        terminated = self.compute_terminated(obs["achieved_goal"], self.goal, info)
+        truncated = self.compute_truncated(obs["achieved_goal"], self.goal, info)
+
+        reward = self.compute_reward(obs["achieved_goal"], self.goal, info)
+
+        return obs, reward, terminated, truncated, info
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ):
+        """Reset MuJoCo simulation to initial state.
+
+        Note: Attempt to reset the simulator. Since we randomize initial conditions, it
+        is possible to get into a state with numerical issues (e.g. due to penetration or
+        Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand).
+        In this case, we just keep randomizing until we eventually achieve a valid initial
+        configuration.
+
+        Args:
+            seed (optional integer): The seed that is used to initialize the environment's PRNG (`np_random`). Defaults to None.
+            options (optional dictionary): Can be used when `reset` is override for additional information to specify how the environment is reset.
+
+        Returns:
+            observation (dictionary) : Observation of the initial state. It should satisfy the `GoalEnv` :attr:`observation_space`.
+            info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to
+                the ``info`` returned by :meth:`step`.
+        """
+        super().reset(seed=seed)
+        did_reset_sim = False
+        while not did_reset_sim:
+            did_reset_sim = self._reset_sim()
+        self.goal = self._sample_goal().copy()
+        obs = self._get_obs()
+        if self.render_mode == "human":
+            self.render()
+
+        return obs, {}
+
+    # Extension methods
+    # ----------------------------
+    def _mujoco_step(self, action):
+        """Advance the mujoco simulation.
+
+        Override depending on the python binginds, either mujoco or mujoco_py
+        """
+        raise NotImplementedError
+
+    def _reset_sim(self):
+        """Resets a simulation and indicates whether or not it was successful.
+
+        If a reset was unsuccessful (e.g. if a randomized state caused an error in the
+        simulation), this method should indicate such a failure by returning False.
+        In such a case, this method will be called again to attempt a the reset again.
+        """
+        return True
+
+    def _initialize_simulation(self):
+        """Initialize MuJoCo simulation data structures mjModel and mjData."""
+        raise NotImplementedError
+
+    def _get_obs(self):
+        """Returns the observation."""
+        raise NotImplementedError()
+
+    def _set_action(self, action):
+        """Applies the given action to the simulation."""
+        raise NotImplementedError()
+
+    def _is_success(self, achieved_goal, desired_goal):
+        """Indicates whether or not the achieved goal successfully achieved the desired goal."""
+        raise NotImplementedError()
+
+    def _sample_goal(self):
+        """Samples a new goal and returns it."""
+        raise NotImplementedError()
+
+    def _env_setup(self, initial_qpos):
+        """Initial configuration of the environment.
+
+        Can be used to configure initial state and extract information from the simulation.
+        """
+        pass
+
+    def _render_callback(self):
+        """A custom callback that is called before rendering.
+
+        Can be used to implement custom visualizations.
+        """
+        pass
+
+    def _step_callback(self):
+        """A custom callback that is called after stepping the simulation.
+
+        Can be used to enforce additional constraints on the simulation state.
+        """
+        pass
+
+
+class MujocoRobotEnv(BaseRobotEnv):
+    """Robot base class for fetch and hand environment versions that depend on new mujoco bindings from Deepmind."""
+
+    def __init__(self, default_camera_config: Optional[dict] = None, **kwargs):
+        """Initialize mujoco environment.
+
+        The Deepmind mujoco bindings are initialized alongside the respective mujoco_utils.
+
+        Args:
+            default_camera_config (optional dictionary): dictionary of default mujoco camera parameters for human rendering. Defaults to None.
+            The keys for this dictionary can be found in the mujoco mjvCamera struct:
+            https://mujoco.readthedocs.io/en/latest/APIreference.html?highlight=azimuth#mjvcamera.
+
+                - "type" (integer): camera type (mjtCamera)
+                - "fixedcamid" (integer): fixed camera id
+                - "trackbodyid": body id to track
+                - "lookat" (np.ndarray): cartesian (x, y, z) lookat point
+                - "distance" (float): distance to lookat point or tracked body
+                - "azimuth" (float): camera azimuth (deg)
+                - "elevation" (float): camera elevation (deg)
+        """
+
+        self._mujoco = mujoco
+        self._utils = mujoco_utils
+
+        super().__init__(**kwargs)
+
+        from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
+
+        self.observation_renderer = MujocoRenderer(self.model, self.data, default_camera_config)
+
+    def _initialize_simulation(self):
+        self.model = self._mujoco.MjModel.from_xml_path(self.fullpath)
+        self.data = self._mujoco.MjData(self.model)
+        self._model_names = self._utils.MujocoModelNames(self.model)
+
+        self.model.vis.global_.offwidth = self.observation_width
+        self.model.vis.global_.offheight = self.observation_height
+
+        self._env_setup(initial_qpos=self.initial_qpos)
+        self.initial_time = self.data.time
+        self.initial_qpos = np.copy(self.data.qpos)
+        self.initial_qvel = np.copy(self.data.qvel)
+
+    def _reset_sim(self):
+        self.data.time = self.initial_time
+        self.data.qpos[:] = np.copy(self.initial_qpos)
+        self.data.qvel[:] = np.copy(self.initial_qvel)
+        if self.model.na != 0:
+            self.data.act[:] = None
+
+        mujoco.mj_forward(self.model, self.data)
+        return super()._reset_sim()
+
+    def render(self):
+        """Render a frame of the MuJoCo simulation.
+
+        Returns:
+            rgb image (np.ndarray): if render_mode is "rgb_array", return a 3D image array.
+        """
+        self._render_callback()
+        return self.observation_renderer.render(self.render_mode)
+
+    def close(self):
+        """Close contains the code necessary to "clean up" the environment.
+
+        Terminates any existing WindowViewer instances in the Gymnaisum MujocoRenderer.
+        """
+        if self.observation_renderer is not None:
+            self.observation_renderer.close()
+
+    @property
+    def dt(self):
+        """Return the timestep of each Gymanisum step."""
+        return self.model.opt.timestep * self.n_substeps
+
+    def _mujoco_step(self, action):
+        self._mujoco.mj_step(self.model, self.data, nstep=self.n_substeps)
diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index a2be191..8ccaf99 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -4,31 +4,46 @@
 import gymnasium as gym
 import mujoco
 import numpy as np
-from gymnasium_robotics.envs import robot_env
+from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
+from gymnasium_robotics.utils import mujoco_utils
 
 from gym_xarm.tasks import mocap
 
 
-class Base(robot_env.MujocoRobotEnv):
+class Base(gym.Env):
     """
-    Superclass for all simxarm environments.
+    Superclass for all gym-xarm environments.
     Args:
             xml_name (str): name of the xml environment file
             gripper_rotation (list): initial rotation of the gripper (given as a quaternion)
     """
 
+    metadata = {
+        "render_modes": [
+            "human",
+            "rgb_array",
+        ],
+        "render_fps": 25,
+    }
+    n_substeps = 20
+    initial_qpos = {}
+    _mujoco = mujoco
+    _utils = mujoco_utils
+
     def __init__(
         self,
-        xml_name,
-        obs_mode="state",
+        task,
+        obs_type="state",
         gripper_rotation=None,
-        image_size=84,
-        visualization_width=None,
-        visualization_height=None,
+        observation_width=84,
+        observation_height=84,
+        visualization_width=680,
+        visualization_height=680,
         render_mode=None,
         frame_stack=1,
         channel_last=False,
     ):
+        # Env setup
         if gripper_rotation is None:
             gripper_rotation = [0, 1, 0, 0]
         self.gripper_rotation = np.array(gripper_rotation, dtype=np.float32)
@@ -36,53 +51,133 @@ def __init__(
         self.max_z = 1.2
         self.min_z = 0.2
 
-        self.obs_mode = obs_mode
-        self.image_size = image_size
+        # Observations
+        self.obs_type = obs_type
+        self.channel_last = channel_last
+
+        # Rendering
+        self.observation_width = observation_width
+        self.observation_height = observation_height
+        self.visualization_width = visualization_width
+        self.visualization_height = visualization_height
         self.render_mode = render_mode
         self.frame_stack = frame_stack
-        self.channel_last = channel_last
         self._frames = deque([], maxlen=frame_stack)
 
-        super().__init__(
-            model_path=os.path.join(os.path.dirname(__file__), "assets", f"{xml_name}.xml"),
-            n_substeps=20,
-            n_actions=4,
-            initial_qpos={},
-            width=image_size,
-            height=image_size,
-        )
+        # Assets
+        self.xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml")
+        if not os.path.exists(self.xml_path):
+            raise OSError(f"File {self.xml_path} does not exist")
 
-        self.observation_space = self._get_observation_space()
+        self._initialize_simulation()
+        self.observation_space = self._initialize_observation_space()
         self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),))
         self.action_padding = np.zeros(4 - len(self.metadata["action_space"]), dtype=np.float32)
+
+        assert (
+            int(np.round(1.0 / self.dt)) == self.metadata["render_fps"]
+        ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}'
+
         if "w" not in self.metadata["action_space"]:
             self.action_padding[-1] = 1.0
 
-        if visualization_width is not None and visualization_height is not None:
-            self.custom_size_renderer = self._get_custom_size_renderer(
-                width=visualization_width, height=visualization_height
-            )
+        # super().__init__(
+        #     xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml"),
+        #     n_substeps=20,
+        #     n_actions=4,
+        #     initial_qpos={},
+        #     width=image_size,
+        #     height=image_size,
+        # )
+
+        self.observation_renderer = self._initialize_renderer(type="observation")
+        self.visualization_renderer = self._initialize_renderer(type="visualization")
+
+    def _initialize_simulation(self):
+        """Initialize MuJoCo simulation data structures mjModel and mjData."""
+        self.model = self._mujoco.MjModel.from_xml_path(self.fullpath)
+        self.data = self._mujoco.MjData(self.model)
+        self._model_names = self._utils.MujocoModelNames(self.model)
+
+        self.model.vis.global_.offwidth = self.observation_width
+        self.model.vis.global_.offheight = self.observation_height
+
+        self._env_setup(initial_qpos=self.initial_qpos)
+        self.initial_time = self.data.time
+        self.initial_qpos = np.copy(self.data.qpos)
+        self.initial_qvel = np.copy(self.data.qvel)
+
+    def _env_setup(self, initial_qpos):
+        """Initial configuration of the environment.
 
-    def _get_observation_space(self):
+        Can be used to configure initial state and extract information from the simulation.
+        """
+        for name, value in initial_qpos.items():
+            self.data.set_joint_qpos(name, value)
+        mocap.reset(self.model, self.data)
+        mujoco.mj_forward(self.model, self.data)
+        self._sample_goal()
+        mujoco.mj_forward(self.model, self.data)
+
+    def _initialize_observation_space(self):
         image_shape = (
             (self.image_size, self.image_size, 3 * self.frame_stack)
             if self.channel_last
             else (3 * self.frame_stack, self.image_size, self.image_size)
         )
-        if self.obs_mode == "state":
-            return self.observation_space["observation"]
-        elif self.obs_mode == "rgb":
-            return gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)
-        elif self.obs_mode == "all":
-            return gym.spaces.Dict(
-                state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),
-                rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
+        if self.obs_type == "state":
+            obs = self._get_obs()
+            observation_space = gym.spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype="float64")
+        elif self.obs_type == "pixels":
+            observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)
+        elif self.obs_type == "pixels_agent_pos":
+            observation_space = gym.spaces.Dict(
+                pixels=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
+                agent_pos=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),
             )
         else:
-            raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]")
+            raise ValueError(
+                f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]"
+            )
+
+        return observation_space
+
+    def _initialize_renderer(self, type: str):
+        if type == "observation":
+            model = self.model
+        elif type == "visualization":
+            # HACK: MujoCo doesn't allow for custom size rendering on-the-fly, so we
+            # initialize another renderer with appropriate size for visualization purposes
+            # see https://gymnasium.farama.org/content/migration-guide/#environment-render
+            from copy import deepcopy
+
+            model = deepcopy(self.model)
+            model.vis.global_.offwidth = self.visualization_width
+            model.vis.global_.offheight = self.visualization_height
+        else:
+            raise ValueError(f"Unknown renderer type {type}. Must be one of [observation, visualization]")
+
+        return MujocoRenderer(model, self.data)
+
+    def _reset_sim(self):
+        """Resets a simulation and indicates whether or not it was successful.
+
+        If a reset was unsuccessful (e.g. if a randomized state caused an error in the
+        simulation), this method should indicate such a failure by returning False.
+        In such a case, this method will be called again to attempt a the reset again.
+        """
+        self.data.time = self.initial_time
+        self.data.qpos[:] = np.copy(self.initial_qpos)
+        self.data.qvel[:] = np.copy(self.initial_qvel)
+        if self.model.na != 0:
+            self.data.act[:] = None
+
+        mujoco.mj_forward(self.model, self.data)
+        return True
 
     @property
     def dt(self):
+        """Return the timestep of each Gymanisum step."""
         return self.n_substeps * self.model.opt.timestep
 
     @property
@@ -99,12 +194,14 @@ def robot_state(self):
         return np.concatenate([self.eef, gripper_angle])
 
     def is_success(self):
+        """Indicates whether or not the achieved goal successfully achieved the desired goal."""
         return NotImplementedError()
 
     def get_reward(self):
         raise NotImplementedError()
 
     def _sample_goal(self):
+        """Samples a new goal and returns it."""
         raise NotImplementedError()
 
     def get_obs(self):
@@ -143,7 +240,6 @@ def _apply_action(self, action):
             np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]),
         )
 
-    def _reset_sim(self):
         self.data.time = self.initial_time
         self.data.qpos[:] = np.copy(self.initial_qpos)
         self.data.qvel[:] = np.copy(self.initial_qvel)
@@ -158,21 +254,47 @@ def _set_gripper(self, gripper_pos, gripper_rotation):
         self.data.qpos[10] = 0.0
         self.data.qpos[12] = 0.0
 
-    def _env_setup(self, initial_qpos):
-        for name, value in initial_qpos.items():
-            self.data.set_joint_qpos(name, value)
-        mocap.reset(self.model, self.data)
-        mujoco.mj_forward(self.model, self.data)
-        self._sample_goal()
-        mujoco.mj_forward(self.model, self.data)
-
-    def reset(self, seed=None, options=None):
-        self._reset_sim()
+    def reset(
+        self,
+        *,
+        seed: int | None = None,
+        options: dict | None = None,
+    ):
+        """Reset MuJoCo simulation to initial state.
+
+        Note: Attempt to reset the simulator. Since we randomize initial conditions, it
+        is possible to get into a state with numerical issues (e.g. due to penetration or
+        Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand).
+        In this case, we just keep randomizing until we eventually achieve a valid initial
+        configuration.
+
+        Args:
+            seed (optional integer): The seed that is used to initialize the environment's PRNG (`np_random`). Defaults to None.
+            options (optional dictionary): Can be used when `reset` is override for additional information to specify how the environment is reset.
+
+        Returns:
+            observation (dictionary) : Observation of the initial state. It should satisfy the `GoalEnv` :attr:`observation_space`.
+            info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to
+                the ``info`` returned by :meth:`step`.
+        """
+        super().reset(seed=seed)
+        did_reset_sim = False
+        while not did_reset_sim:
+            did_reset_sim = self._reset_sim()
         observation = self._get_obs()
-        observation = self._transform_obs(observation)
+        if self.render_mode == "human":
+            self.render()
         info = {}
         return observation, info
 
+    # def reset(self, seed=None, options=None):
+    #     super().reset(seed=seed, options=options)
+    #     self._reset_sim()
+    #     observation = self._get_obs()
+    #     observation = self._transform_obs(observation)
+    #     info = {}
+    #     return observation, info
+
     def step(self, action):
         assert action.shape == (4,)
         assert self.action_space.contains(action), "{!r} ({}) invalid".format(action, type(action))
@@ -187,18 +309,18 @@ def step(self, action):
         return observation, reward, done, info
 
     def _transform_obs(self, obs, reset=False):
-        if self.obs_mode == "state":
+        if self.obs_type == "state":
             return obs["observation"]
-        elif self.obs_mode == "rgb":
+        elif self.obs_type == "rgb":
             self._update_frames(reset=reset)
             rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
             return rgb_obs
-        elif self.obs_mode == "all":
+        elif self.obs_type == "all":
             self._update_frames(reset=reset)
             rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
             return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
         else:
-            raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]")
+            raise ValueError(f"Unknown obs_type {self.obs_type}. Must be one of [rgb, all, state]")
 
     def _render_callback(self):
         self._mujoco.mj_forward(self.model, self.data)
@@ -211,30 +333,25 @@ def _update_frames(self, reset=False):
                 self._frames.append(pixels)
         assert len(self._frames) == self.frame_stack
 
+    def _render_obs(self):
+        obs = self.render(mode="rgb_array")
+        if not self.channel_last:
+            obs = obs.transpose(2, 0, 1)
+        return obs.copy()
+
     def render(self, mode="rgb_array"):
         self._render_callback()
-        # TODO: use self.render_mode
-        if mode == "visualize":
-            return self._custom_size_render()
-
-        return self.mujoco_renderer.render(mode, camera_name="camera0")
-
-    def _custom_size_render(self):
-        return self.custom_size_renderer.render("rgb_array", camera_name="camera0")
+        # return self._mujoco.physics.render(height=84, width=84, camera_name="camera0")
 
-    def _get_custom_size_renderer(self, width, height):
-        from copy import deepcopy
-
-        from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
+        if mode == "visualize":
+            return self.visualization_renderer.render("rgb_array", camera_name="camera0")
 
-        # HACK: MujoCo doesn't allow for custom size rendering on-the-fly, so we
-        # initialize another renderer with appropriate size for visualization purposes
-        # see https://gymnasium.farama.org/content/migration-guide/#environment-render
-        custom_render_model = deepcopy(self.model)
-        custom_render_model.vis.global_.offwidth = width
-        custom_render_model.vis.global_.offheight = height
-        return MujocoRenderer(custom_render_model, self.data)
+        return self.observation_renderer.render(mode, camera_name="camera0")
 
     def close(self):
-        if self.mujoco_renderer is not None:
-            self.mujoco_renderer.close()
+        """Close contains the code necessary to "clean up" the environment.
+
+        Terminates any existing WindowViewer instances in the Gymnasium MujocoRenderer.
+        """
+        if self.observation_renderer is not None:
+            self.observation_renderer.close()
diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py
index 1755694..d3a98d0 100644
--- a/gym_xarm/tasks/lift.py
+++ b/gym_xarm/tasks/lift.py
@@ -48,6 +48,13 @@ def get_reward(self):
         return reach_reward / 100 + pick_reward
 
     def _get_obs(self):
+        # WIP
+        if self.obs_type == "state":
+            obs = self._get_state_obs()
+        elif self.obs_type == "pixels":
+            obs = self._get_pixels_obs()
+
+    def _get_state_obs(self):
         eef_velp = self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt
         gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
         eef = self.eef - self.center_of_table
@@ -57,7 +64,7 @@ def _get_obs(self):
         obj_velp = self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt
         obj_velr = self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt
 
-        obs = np.concatenate(
+        return np.concatenate(
             [
                 eef,
                 eef_velp,
@@ -79,7 +86,7 @@ def _get_obs(self):
             ],
             axis=0,
         )
-        return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef}
+        # return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef}
 
     def _sample_goal(self):
         # Gripper
@@ -100,7 +107,7 @@ def _sample_goal(self):
 
     def reset(self, seed=None, options=None):
         self._action = np.zeros(4)
-        return super().reset(seed, options)
+        return super().reset(seed=seed, options=options)
 
     def step(self, action):
         self._action = action.copy()
diff --git a/tests/test_env.py b/tests/test_env.py
index 23f392e..f8b3380 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -6,7 +6,9 @@
 
 def test_env():
     env = gym.make("gym_xarm/XarmLift-v0")
-    check_env(env.unwrapped, skip_render_check=True)
+    # check_env(env.unwrapped, skip_render_check=True)
+    env.reset()
+    env.render()
 
 
 if __name__ == "__main__":

From 186095d3202ec62360a5fc6462f1a1ab655509a0 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Fri, 5 Apr 2024 11:41:39 +0200
Subject: [PATCH 02/16] WIP

---
 gym_xarm/tasks/base.py | 217 +++++++++++++++++++++--------------------
 gym_xarm/tasks/lift.py |  74 +++++++++-----
 2 files changed, 163 insertions(+), 128 deletions(-)

diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 8ccaf99..852a90a 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -1,5 +1,5 @@
 import os
-from collections import OrderedDict, deque
+from collections import deque
 
 import gymnasium as gym
 import mujoco
@@ -41,7 +41,7 @@ def __init__(
         visualization_height=680,
         render_mode=None,
         frame_stack=1,
-        channel_last=False,
+        channel_last=True,
     ):
         # Env setup
         if gripper_rotation is None:
@@ -69,6 +69,7 @@ def __init__(
         if not os.path.exists(self.xml_path):
             raise OSError(f"File {self.xml_path} does not exist")
 
+        # Initialize sim, spaces & renderers
         self._initialize_simulation()
         self.observation_space = self._initialize_observation_space()
         self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),))
@@ -81,6 +82,9 @@ def __init__(
         if "w" not in self.metadata["action_space"]:
             self.action_padding[-1] = 1.0
 
+        self.observation_renderer = self._initialize_renderer(type="observation")
+        self.visualization_renderer = self._initialize_renderer(type="visualization")
+
         # super().__init__(
         #     xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml"),
         #     n_substeps=20,
@@ -90,12 +94,9 @@ def __init__(
         #     height=image_size,
         # )
 
-        self.observation_renderer = self._initialize_renderer(type="observation")
-        self.visualization_renderer = self._initialize_renderer(type="visualization")
-
     def _initialize_simulation(self):
         """Initialize MuJoCo simulation data structures mjModel and mjData."""
-        self.model = self._mujoco.MjModel.from_xml_path(self.fullpath)
+        self.model = self._mujoco.MjModel.from_xml_path(self.xml_path)
         self.data = self._mujoco.MjData(self.model)
         self._model_names = self._utils.MujocoModelNames(self.model)
 
@@ -121,9 +122,9 @@ def _env_setup(self, initial_qpos):
 
     def _initialize_observation_space(self):
         image_shape = (
-            (self.image_size, self.image_size, 3 * self.frame_stack)
+            (self.observation_width, self.observation_height, 3 * self.frame_stack)
             if self.channel_last
-            else (3 * self.frame_stack, self.image_size, self.image_size)
+            else (3 * self.frame_stack, self.observation_width, self.observation_height)
         )
         if self.obs_type == "state":
             obs = self._get_obs()
@@ -159,22 +160,6 @@ def _initialize_renderer(self, type: str):
 
         return MujocoRenderer(model, self.data)
 
-    def _reset_sim(self):
-        """Resets a simulation and indicates whether or not it was successful.
-
-        If a reset was unsuccessful (e.g. if a randomized state caused an error in the
-        simulation), this method should indicate such a failure by returning False.
-        In such a case, this method will be called again to attempt a the reset again.
-        """
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        if self.model.na != 0:
-            self.data.act[:] = None
-
-        mujoco.mj_forward(self.model, self.data)
-        return True
-
     @property
     def dt(self):
         """Return the timestep of each Gymanisum step."""
@@ -205,60 +190,12 @@ def _sample_goal(self):
         raise NotImplementedError()
 
     def get_obs(self):
-        return self._get_obs()
-
-    def _step_callback(self):
-        self._mujoco.mj_forward(self.model, self.data)
-
-    def _limit_gripper(self, gripper_pos, pos_ctrl):
-        if gripper_pos[0] > self.center_of_table[0] - 0.105 + 0.15:
-            pos_ctrl[0] = min(pos_ctrl[0], 0)
-        if gripper_pos[0] < self.center_of_table[0] - 0.105 - 0.3:
-            pos_ctrl[0] = max(pos_ctrl[0], 0)
-        if gripper_pos[1] > self.center_of_table[1] + 0.3:
-            pos_ctrl[1] = min(pos_ctrl[1], 0)
-        if gripper_pos[1] < self.center_of_table[1] - 0.3:
-            pos_ctrl[1] = max(pos_ctrl[1], 0)
-        if gripper_pos[2] > self.max_z:
-            pos_ctrl[2] = min(pos_ctrl[2], 0)
-        if gripper_pos[2] < self.min_z:
-            pos_ctrl[2] = max(pos_ctrl[2], 0)
-        return pos_ctrl
-
-    def _apply_action(self, action):
-        assert action.shape == (4,)
-        action = action.copy()
-        pos_ctrl, gripper_ctrl = action[:3], action[3]
-        pos_ctrl = self._limit_gripper(
-            self._utils.get_site_xpos(self.model, self.data, "grasp"), pos_ctrl
-        ) * (1 / self.n_substeps)
-        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
-        mocap.apply_action(
-            self.model,
-            self._model_names,
-            self.data,
-            np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]),
-        )
-
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        self._sample_goal()
-        self._mujoco.mj_step(self.model, self.data, nstep=10)
-        return True
-
-    def _set_gripper(self, gripper_pos, gripper_rotation):
-        self._utils.set_mocap_pos(self.model, self.data, "robot0:mocap", gripper_pos)
-        self._utils.set_mocap_quat(self.model, self.data, "robot0:mocap", gripper_rotation)
-        self._utils.set_joint_qpos(self.model, self.data, "right_outer_knuckle_joint", 0)
-        self.data.qpos[10] = 0.0
-        self.data.qpos[12] = 0.0
+        raise NotImplementedError()
 
     def reset(
         self,
         *,
         seed: int | None = None,
-        options: dict | None = None,
     ):
         """Reset MuJoCo simulation to initial state.
 
@@ -287,6 +224,22 @@ def reset(
         info = {}
         return observation, info
 
+    def _reset_sim(self):
+        """Resets a simulation and indicates whether or not it was successful.
+
+        If a reset was unsuccessful (e.g. if a randomized state caused an error in the
+        simulation), this method should indicate such a failure by returning False.
+        In such a case, this method will be called again to attempt a the reset again.
+        """
+        self.data.time = self.initial_time
+        self.data.qpos[:] = np.copy(self.initial_qpos)
+        self.data.qvel[:] = np.copy(self.initial_qvel)
+        if self.model.na != 0:
+            self.data.act[:] = None
+
+        mujoco.mj_forward(self.model, self.data)
+        return True
+
     # def reset(self, seed=None, options=None):
     #     super().reset(seed=seed, options=options)
     #     self._reset_sim()
@@ -297,56 +250,106 @@ def reset(
 
     def step(self, action):
         assert action.shape == (4,)
-        assert self.action_space.contains(action), "{!r} ({}) invalid".format(action, type(action))
+        assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid"
         self._apply_action(action)
         self._mujoco.mj_step(self.model, self.data, nstep=2)
         self._step_callback()
-        observation = self._get_obs()
-        observation = self._transform_obs(observation)
+        observation = self.get_obs()
+        # observation = self.get_obs()
+        # observation = self._transform_obs(observation)
         reward = self.get_reward()
         done = False
         info = {"is_success": self.is_success(), "success": self.is_success()}
         return observation, reward, done, info
 
-    def _transform_obs(self, obs, reset=False):
-        if self.obs_type == "state":
-            return obs["observation"]
-        elif self.obs_type == "rgb":
-            self._update_frames(reset=reset)
-            rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
-            return rgb_obs
-        elif self.obs_type == "all":
-            self._update_frames(reset=reset)
-            rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
-            return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
-        else:
-            raise ValueError(f"Unknown obs_type {self.obs_type}. Must be one of [rgb, all, state]")
-
-    def _render_callback(self):
+    def _step_callback(self):
         self._mujoco.mj_forward(self.model, self.data)
 
-    def _update_frames(self, reset=False):
-        pixels = self._render_obs()
-        self._frames.append(pixels)
-        if reset:
-            for _ in range(1, self.frame_stack):
-                self._frames.append(pixels)
-        assert len(self._frames) == self.frame_stack
+    def _limit_gripper(self, gripper_pos, pos_ctrl):
+        if gripper_pos[0] > self.center_of_table[0] - 0.105 + 0.15:
+            pos_ctrl[0] = min(pos_ctrl[0], 0)
+        if gripper_pos[0] < self.center_of_table[0] - 0.105 - 0.3:
+            pos_ctrl[0] = max(pos_ctrl[0], 0)
+        if gripper_pos[1] > self.center_of_table[1] + 0.3:
+            pos_ctrl[1] = min(pos_ctrl[1], 0)
+        if gripper_pos[1] < self.center_of_table[1] - 0.3:
+            pos_ctrl[1] = max(pos_ctrl[1], 0)
+        if gripper_pos[2] > self.max_z:
+            pos_ctrl[2] = min(pos_ctrl[2], 0)
+        if gripper_pos[2] < self.min_z:
+            pos_ctrl[2] = max(pos_ctrl[2], 0)
+        return pos_ctrl
+
+    def _apply_action(self, action):
+        assert action.shape == (4,)
+        action = action.copy()
+        pos_ctrl, gripper_ctrl = action[:3], action[3]
+        pos_ctrl = self._limit_gripper(
+            self._utils.get_site_xpos(self.model, self.data, "grasp"), pos_ctrl
+        ) * (1 / self.n_substeps)
+        gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
+        mocap.apply_action(
+            self.model,
+            self._model_names,
+            self.data,
+            np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]),
+        )
 
-    def _render_obs(self):
-        obs = self.render(mode="rgb_array")
-        if not self.channel_last:
-            obs = obs.transpose(2, 0, 1)
-        return obs.copy()
+        self.data.time = self.initial_time
+        self.data.qpos[:] = np.copy(self.initial_qpos)
+        self.data.qvel[:] = np.copy(self.initial_qvel)
+        self._sample_goal()
+        self._mujoco.mj_step(self.model, self.data, nstep=10)
+        return True
+
+    def _set_gripper(self, gripper_pos, gripper_rotation):
+        self._utils.set_mocap_pos(self.model, self.data, "robot0:mocap", gripper_pos)
+        self._utils.set_mocap_quat(self.model, self.data, "robot0:mocap", gripper_rotation)
+        self._utils.set_joint_qpos(self.model, self.data, "right_outer_knuckle_joint", 0)
+        self.data.qpos[10] = 0.0
+        self.data.qpos[12] = 0.0
+
+    # def _transform_obs(self, obs, reset=False):
+    #     if self.obs_type == "state":
+    #         return obs["observation"]
+    #     elif self.obs_type == "rgb":
+    #         self._update_frames(reset=reset)
+    #         rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
+    #         return rgb_obs
+    #     elif self.obs_type == "all":
+    #         self._update_frames(reset=reset)
+    #         rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
+    #         return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
+    #     else:
+    #         raise ValueError(f"Unknown obs_type {self.obs_type}. Must be one of [rgb, all, state]")
+
+    # def _update_frames(self, reset=False):
+    #     pixels = self._render_obs()
+    #     self._frames.append(pixels)
+    #     if reset:
+    #         for _ in range(1, self.frame_stack):
+    #             self._frames.append(pixels)
+    #     assert len(self._frames) == self.frame_stack
+
+    # def _render_obs(self):
+    #     obs = self.render(mode="rgb_array")
+    #     if not self.channel_last:
+    #         obs = obs.transpose(2, 0, 1)
+    #     return obs.copy()
 
     def render(self, mode="rgb_array"):
         self._render_callback()
-        # return self._mujoco.physics.render(height=84, width=84, camera_name="camera0")
-
         if mode == "visualize":
             return self.visualization_renderer.render("rgb_array", camera_name="camera0")
 
-        return self.observation_renderer.render(mode, camera_name="camera0")
+        render = self.observation_renderer.render("rgb_array", camera_name="camera0")
+        if self.channel_last:
+            return render
+        else:
+            return render.transpose(2, 0, 1)
+
+    def _render_callback(self):
+        self._mujoco.mj_forward(self.model, self.data)
 
     def close(self):
         """Close contains the code necessary to "clean up" the environment.
@@ -355,3 +358,5 @@ def close(self):
         """
         if self.observation_renderer is not None:
             self.observation_renderer.close()
+        if self.visualization_renderer is not None:
+            self.visualization_renderer.close()
diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py
index d3a98d0..c367017 100644
--- a/gym_xarm/tasks/lift.py
+++ b/gym_xarm/tasks/lift.py
@@ -19,6 +19,26 @@ def __init__(self, **kwargs):
     def z_target(self):
         return self._init_z + self._z_threshold
 
+    @property
+    def eef_velp(self):
+        return self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt
+
+    @property
+    def obj_rot(self):
+        return self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:]
+
+    @property
+    def obj_velp(self):
+        return self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt
+
+    @property
+    def obj_velr(self):
+        return self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt
+
+    @property
+    def gripper_angle(self):
+        return self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
+
     def is_success(self):
         return self.obj[2] >= self.z_target
 
@@ -47,46 +67,56 @@ def get_reward(self):
 
         return reach_reward / 100 + pick_reward
 
-    def _get_obs(self):
-        # WIP
+    def get_obs(self):
         if self.obs_type == "state":
-            obs = self._get_state_obs()
-        elif self.obs_type == "pixels":
-            obs = self._get_pixels_obs()
+            return self._get_obs()
+        pixels = self.render()
+        if self.obs_type == "pixels":
+            return pixels
+        elif self.obs_type == "pixels_agent_pos":
+            return {
+                "pixels": pixels,
+                "agent_pos": self._get_obs(agent_only=True),
+            }
+        else:
+            raise ValueError(
+                f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]"
+            )
 
-    def _get_state_obs(self):
-        eef_velp = self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt
-        gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
+    def _get_obs(self, agent_only=False):
         eef = self.eef - self.center_of_table
+        if agent_only:
+            return np.concatenate(
+                [
+                    eef,
+                    self.eef_velp,
+                    self.gripper_angle,
+                ]
+            )
 
         obj = self.obj - self.center_of_table
-        obj_rot = self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:]
-        obj_velp = self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt
-        obj_velr = self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt
-
         return np.concatenate(
             [
                 eef,
-                eef_velp,
+                self.eef_velp,
                 obj,
-                obj_rot,
-                obj_velp,
-                obj_velr,
-                eef - obj,
+                self.obj_rot,
+                self.obj_velp,
+                self.obj_velr,
+                self.eef - self.obj,
                 np.array(
                     [
-                        np.linalg.norm(eef - obj),
+                        np.linalg.norm(self.eef - self.obj),
                         np.linalg.norm(eef[:-1] - obj[:-1]),
                         self.z_target,
                         self.z_target - obj[-1],
                         self.z_target - eef[-1],
                     ]
                 ),
-                gripper_angle,
+                self.gripper_angle,
             ],
             axis=0,
         )
-        # return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef}
 
     def _sample_goal(self):
         # Gripper
@@ -105,9 +135,9 @@ def _sample_goal(self):
         # Goal
         return object_pos + np.array([0, 0, self._z_threshold])
 
-    def reset(self, seed=None, options=None):
+    def reset(self, seed=None):
         self._action = np.zeros(4)
-        return super().reset(seed=seed, options=options)
+        return super().reset(seed=seed)
 
     def step(self, action):
         self._action = action.copy()

From 41020985b93afc9146ac976c9c55f00d16097144 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Fri, 5 Apr 2024 11:46:36 +0200
Subject: [PATCH 03/16] Fix CI

---
 .github/workflows/test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 985606a..2769dcd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -84,8 +84,8 @@ jobs:
           path: .venv
           key: venv-${{ steps.setup-python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
 
-      # - name: Install libegl1-mesa-dev (to use MUJOCO_GL=egl)
-      #   run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
+      - name: Install libegl1-mesa-dev (to use MUJOCO_GL=egl)
+        run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
 
       #----------------------------------------------
       #             install project

From fc0cc41ad7de47df73b9d11bceef18e2d40f0f63 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Fri, 5 Apr 2024 15:31:19 +0200
Subject: [PATCH 04/16] WIP

---
 gym_xarm/tasks/base.py | 20 ++++++++++++--------
 gym_xarm/tasks/lift.py |  8 ++++++--
 tests/test_env.py      | 33 ++++++++++++++++++++++++---------
 3 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 852a90a..2a4a125 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -71,6 +71,8 @@ def __init__(
 
         # Initialize sim, spaces & renderers
         self._initialize_simulation()
+        self.observation_renderer = self._initialize_renderer(type="observation")
+        self.visualization_renderer = self._initialize_renderer(type="visualization")
         self.observation_space = self._initialize_observation_space()
         self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),))
         self.action_padding = np.zeros(4 - len(self.metadata["action_space"]), dtype=np.float32)
@@ -82,9 +84,6 @@ def __init__(
         if "w" not in self.metadata["action_space"]:
             self.action_padding[-1] = 1.0
 
-        self.observation_renderer = self._initialize_renderer(type="observation")
-        self.visualization_renderer = self._initialize_renderer(type="visualization")
-
         # super().__init__(
         #     xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml"),
         #     n_substeps=20,
@@ -126,15 +125,19 @@ def _initialize_observation_space(self):
             if self.channel_last
             else (3 * self.frame_stack, self.observation_width, self.observation_height)
         )
+        obs = self.get_obs()
         if self.obs_type == "state":
-            obs = self._get_obs()
-            observation_space = gym.spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype="float64")
+            observation_space = gym.spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype=np.float64)
         elif self.obs_type == "pixels":
             observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)
         elif self.obs_type == "pixels_agent_pos":
             observation_space = gym.spaces.Dict(
-                pixels=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
-                agent_pos=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),
+                {
+                    "pixels": gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
+                    "agent_pos": gym.spaces.Box(
+                        low=-np.inf, high=np.inf, shape=obs["agent_pos"].shape, dtype=np.float64
+                    ),
+                }
             )
         else:
             raise ValueError(
@@ -196,6 +199,7 @@ def reset(
         self,
         *,
         seed: int | None = None,
+        options: dict | None = None,
     ):
         """Reset MuJoCo simulation to initial state.
 
@@ -218,7 +222,7 @@ def reset(
         did_reset_sim = False
         while not did_reset_sim:
             did_reset_sim = self._reset_sim()
-        observation = self._get_obs()
+        observation = self.get_obs()
         if self.render_mode == "human":
             self.render()
         info = {}
diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py
index c367017..5bb2293 100644
--- a/gym_xarm/tasks/lift.py
+++ b/gym_xarm/tasks/lift.py
@@ -135,9 +135,13 @@ def _sample_goal(self):
         # Goal
         return object_pos + np.array([0, 0, self._z_threshold])
 
-    def reset(self, seed=None):
+    def reset(
+        self,
+        seed=None,
+        options: dict | None = None,
+    ):
         self._action = np.zeros(4)
-        return super().reset(seed=seed)
+        return super().reset(seed=seed, options=options)
 
     def step(self, action):
         self._action = action.copy()
diff --git a/tests/test_env.py b/tests/test_env.py
index f8b3380..9d2e8ba 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -1,15 +1,30 @@
+import pytest
 import gymnasium as gym
 from gymnasium.utils.env_checker import check_env
 
-import gym_xarm  # noqa: F401
-
-
-def test_env():
-    env = gym.make("gym_xarm/XarmLift-v0")
-    # check_env(env.unwrapped, skip_render_check=True)
-    env.reset()
-    env.render()
+@pytest.mark.parametrize(
+    "env_task, obs_type",
+    [
+        ("XarmLift-v0", "state"),
+        ("XarmLift-v0", "pixels"),
+        ("XarmLift-v0", "pixels_agent_pos"),
+        # TODO(aliberts): Add other tasks
+        # ("reach", False, False),
+        # ("reach", True, False),
+        # ("push", False, False),
+        # ("push", True, False),
+        # ("peg_in_box", False, False),
+        # ("peg_in_box", True, False),
+    ],
+)
+def test_env(env_task, obs_type):
+    import gym_xarm  # noqa: F401
+    env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type)
+    check_env(env.unwrapped, skip_render_check=True)
+    # env.reset()
+    # env.render()
 
 
 if __name__ == "__main__":
-    test_env()
\ No newline at end of file
+    test_env("XarmLift-v0", "pixels_agent_pos")
+    # test_env("XarmLift-v0", "state")
\ No newline at end of file

From 2524966e2c425cfa8425c86cf08a2c2a0836d503 Mon Sep 17 00:00:00 2001
From: Cadene <re.cadene@gmail.com>
Date: Fri, 5 Apr 2024 16:10:04 +0000
Subject: [PATCH 05/16] Fix, remove eef_velop, add copy(), same API as pusht

---
 gym_xarm/robot_env.py  | 306 -----------------------------------------
 gym_xarm/tasks/base.py |  26 ++--
 gym_xarm/tasks/lift.py |   1 -
 3 files changed, 10 insertions(+), 323 deletions(-)
 delete mode 100644 gym_xarm/robot_env.py

diff --git a/gym_xarm/robot_env.py b/gym_xarm/robot_env.py
deleted file mode 100644
index d1d732e..0000000
--- a/gym_xarm/robot_env.py
+++ /dev/null
@@ -1,306 +0,0 @@
-import os
-from typing import Optional
-
-import gymnasium as gym
-import mujoco
-import numpy as np
-from gymnasium import spaces
-from gymnasium_robotics.utils import mujoco_utils
-
-DEFAULT_SIZE = 480
-
-
-class BaseRobotEnv(gym.Env):
-    """Modified version of BaseRobotEnv to be a `gym.Env` instead of a `gymnasium_robotics.GoalEnv`"""
-
-    metadata = {
-        "render_modes": [
-            "human",
-            "rgb_array",
-        ],
-        "render_fps": 25,
-    }
-
-    def __init__(
-        self,
-        model_path: str,
-        initial_qpos,
-        n_actions: int,
-        n_substeps: int,
-        render_mode: Optional[str] = None,
-        width: int = DEFAULT_SIZE,
-        height: int = DEFAULT_SIZE,
-    ):
-        """Initialize the hand and fetch robot superclass.
-
-        Args:
-            model_path (string): the path to the mjcf MuJoCo model.
-            initial_qpos (np.ndarray): initial position value of the joints in the MuJoCo simulation.
-            n_actions (integer): size of the action space.
-            n_substeps (integer): number of MuJoCo simulation timesteps per Gymnasium step.
-            render_mode (optional string): type of rendering mode, "human" for window rendeirng and "rgb_array" for offscreen. Defaults to None.
-            width (optional integer): width of each rendered frame. Defaults to DEFAULT_SIZE.
-            height (optional integer): height of each rendered frame . Defaults to DEFAULT_SIZE.
-        """
-        if model_path.startswith("/"):
-            self.fullpath = model_path
-        else:
-            self.fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
-        if not os.path.exists(self.fullpath):
-            raise OSError(f"File {self.fullpath} does not exist")
-
-        self.n_substeps = n_substeps
-
-        self.initial_qpos = initial_qpos
-
-        self.width = width
-        self.height = height
-        self._initialize_simulation()
-
-        self.goal = np.zeros(0)
-        obs = self._get_obs()
-
-        assert (
-            int(np.round(1.0 / self.dt)) == self.metadata["render_fps"]
-        ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}'
-
-        self.action_space = spaces.Box(-1.0, 1.0, shape=(n_actions,), dtype="float32")
-        self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype="float64")
-        # self.observation_space = spaces.Dict(
-        #     dict(
-        #         desired_goal=spaces.Box(
-        #             -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64"
-        #         ),
-        #         achieved_goal=spaces.Box(
-        #             -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64"
-        #         ),
-        #         observation=spaces.Box(
-        #             -np.inf, np.inf, shape=obs["observation"].shape, dtype="float64"
-        #         ),
-        #     )
-        # )
-
-        self.render_mode = render_mode
-
-    # Env methods
-    # ----------------------------
-    def compute_terminated(self, achieved_goal, desired_goal, info):
-        """All the available environments are currently continuing tasks and non-time dependent. The objective is to reach the goal for an indefinite period of time."""
-        return False
-
-    def compute_truncated(self, achievec_goal, desired_goal, info):
-        """The environments will be truncated only if setting a time limit with max_steps which will automatically wrap the environment in a gymnasium TimeLimit wrapper."""
-        return False
-
-    def step(self, action):
-        """Run one timestep of the environment's dynamics using the agent actions.
-
-        Args:
-            action (np.ndarray): Control action to be applied to the agent and update the simulation. Should be of shape :attr:`action_space`.
-
-        Returns:
-            observation (dictionary): Next observation due to the agent actions .It should satisfy the `GoalEnv` :attr:`observation_space`.
-            reward (integer): The reward as a result of taking the action. This is calculated by :meth:`compute_reward` of `GoalEnv`.
-            terminated (boolean): Whether the agent reaches the terminal state. This is calculated by :meth:`compute_terminated` of `GoalEnv`.
-            truncated (boolean): Whether the truncation condition outside the scope of the MDP is satisfied. Timically, due to a timelimit, but
-            it is also calculated in :meth:`compute_truncated` of `GoalEnv`.
-            info (dictionary): Contains auxiliary diagnostic information (helpful for debugging, learning, and logging). In this case there is a single
-            key `is_success` with a boolean value, True if the `achieved_goal` is the same as the `desired_goal`.
-        """
-        if np.array(action).shape != self.action_space.shape:
-            raise ValueError("Action dimension mismatch")
-
-        action = np.clip(action, self.action_space.low, self.action_space.high)
-        self._set_action(action)
-
-        self._mujoco_step(action)
-
-        self._step_callback()
-
-        if self.render_mode == "human":
-            self.render()
-        obs = self._get_obs()
-
-        info = {
-            "is_success": self._is_success(obs["achieved_goal"], self.goal),
-        }
-
-        terminated = self.compute_terminated(obs["achieved_goal"], self.goal, info)
-        truncated = self.compute_truncated(obs["achieved_goal"], self.goal, info)
-
-        reward = self.compute_reward(obs["achieved_goal"], self.goal, info)
-
-        return obs, reward, terminated, truncated, info
-
-    def reset(
-        self,
-        *,
-        seed: Optional[int] = None,
-        options: Optional[dict] = None,
-    ):
-        """Reset MuJoCo simulation to initial state.
-
-        Note: Attempt to reset the simulator. Since we randomize initial conditions, it
-        is possible to get into a state with numerical issues (e.g. due to penetration or
-        Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand).
-        In this case, we just keep randomizing until we eventually achieve a valid initial
-        configuration.
-
-        Args:
-            seed (optional integer): The seed that is used to initialize the environment's PRNG (`np_random`). Defaults to None.
-            options (optional dictionary): Can be used when `reset` is override for additional information to specify how the environment is reset.
-
-        Returns:
-            observation (dictionary) : Observation of the initial state. It should satisfy the `GoalEnv` :attr:`observation_space`.
-            info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to
-                the ``info`` returned by :meth:`step`.
-        """
-        super().reset(seed=seed)
-        did_reset_sim = False
-        while not did_reset_sim:
-            did_reset_sim = self._reset_sim()
-        self.goal = self._sample_goal().copy()
-        obs = self._get_obs()
-        if self.render_mode == "human":
-            self.render()
-
-        return obs, {}
-
-    # Extension methods
-    # ----------------------------
-    def _mujoco_step(self, action):
-        """Advance the mujoco simulation.
-
-        Override depending on the python binginds, either mujoco or mujoco_py
-        """
-        raise NotImplementedError
-
-    def _reset_sim(self):
-        """Resets a simulation and indicates whether or not it was successful.
-
-        If a reset was unsuccessful (e.g. if a randomized state caused an error in the
-        simulation), this method should indicate such a failure by returning False.
-        In such a case, this method will be called again to attempt a the reset again.
-        """
-        return True
-
-    def _initialize_simulation(self):
-        """Initialize MuJoCo simulation data structures mjModel and mjData."""
-        raise NotImplementedError
-
-    def _get_obs(self):
-        """Returns the observation."""
-        raise NotImplementedError()
-
-    def _set_action(self, action):
-        """Applies the given action to the simulation."""
-        raise NotImplementedError()
-
-    def _is_success(self, achieved_goal, desired_goal):
-        """Indicates whether or not the achieved goal successfully achieved the desired goal."""
-        raise NotImplementedError()
-
-    def _sample_goal(self):
-        """Samples a new goal and returns it."""
-        raise NotImplementedError()
-
-    def _env_setup(self, initial_qpos):
-        """Initial configuration of the environment.
-
-        Can be used to configure initial state and extract information from the simulation.
-        """
-        pass
-
-    def _render_callback(self):
-        """A custom callback that is called before rendering.
-
-        Can be used to implement custom visualizations.
-        """
-        pass
-
-    def _step_callback(self):
-        """A custom callback that is called after stepping the simulation.
-
-        Can be used to enforce additional constraints on the simulation state.
-        """
-        pass
-
-
-class MujocoRobotEnv(BaseRobotEnv):
-    """Robot base class for fetch and hand environment versions that depend on new mujoco bindings from Deepmind."""
-
-    def __init__(self, default_camera_config: Optional[dict] = None, **kwargs):
-        """Initialize mujoco environment.
-
-        The Deepmind mujoco bindings are initialized alongside the respective mujoco_utils.
-
-        Args:
-            default_camera_config (optional dictionary): dictionary of default mujoco camera parameters for human rendering. Defaults to None.
-            The keys for this dictionary can be found in the mujoco mjvCamera struct:
-            https://mujoco.readthedocs.io/en/latest/APIreference.html?highlight=azimuth#mjvcamera.
-
-                - "type" (integer): camera type (mjtCamera)
-                - "fixedcamid" (integer): fixed camera id
-                - "trackbodyid": body id to track
-                - "lookat" (np.ndarray): cartesian (x, y, z) lookat point
-                - "distance" (float): distance to lookat point or tracked body
-                - "azimuth" (float): camera azimuth (deg)
-                - "elevation" (float): camera elevation (deg)
-        """
-
-        self._mujoco = mujoco
-        self._utils = mujoco_utils
-
-        super().__init__(**kwargs)
-
-        from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
-
-        self.observation_renderer = MujocoRenderer(self.model, self.data, default_camera_config)
-
-    def _initialize_simulation(self):
-        self.model = self._mujoco.MjModel.from_xml_path(self.fullpath)
-        self.data = self._mujoco.MjData(self.model)
-        self._model_names = self._utils.MujocoModelNames(self.model)
-
-        self.model.vis.global_.offwidth = self.observation_width
-        self.model.vis.global_.offheight = self.observation_height
-
-        self._env_setup(initial_qpos=self.initial_qpos)
-        self.initial_time = self.data.time
-        self.initial_qpos = np.copy(self.data.qpos)
-        self.initial_qvel = np.copy(self.data.qvel)
-
-    def _reset_sim(self):
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        if self.model.na != 0:
-            self.data.act[:] = None
-
-        mujoco.mj_forward(self.model, self.data)
-        return super()._reset_sim()
-
-    def render(self):
-        """Render a frame of the MuJoCo simulation.
-
-        Returns:
-            rgb image (np.ndarray): if render_mode is "rgb_array", return a 3D image array.
-        """
-        self._render_callback()
-        return self.observation_renderer.render(self.render_mode)
-
-    def close(self):
-        """Close contains the code necessary to "clean up" the environment.
-
-        Terminates any existing WindowViewer instances in the Gymnaisum MujocoRenderer.
-        """
-        if self.observation_renderer is not None:
-            self.observation_renderer.close()
-
-    @property
-    def dt(self):
-        """Return the timestep of each Gymanisum step."""
-        return self.model.opt.timestep * self.n_substeps
-
-    def _mujoco_step(self, action):
-        self._mujoco.mj_step(self.model, self.data, nstep=self.n_substeps)
diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 2a4a125..394e63d 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -19,10 +19,7 @@ class Base(gym.Env):
     """
 
     metadata = {
-        "render_modes": [
-            "human",
-            "rgb_array",
-        ],
+        "render_modes": [],
         "render_fps": 25,
     }
     n_substeps = 20
@@ -39,7 +36,6 @@ def __init__(
         observation_height=84,
         visualization_width=680,
         visualization_height=680,
-        render_mode=None,
         frame_stack=1,
         channel_last=True,
     ):
@@ -60,7 +56,6 @@ def __init__(
         self.observation_height = observation_height
         self.visualization_width = visualization_width
         self.visualization_height = visualization_height
-        self.render_mode = render_mode
         self.frame_stack = frame_stack
         self._frames = deque([], maxlen=frame_stack)
 
@@ -121,9 +116,9 @@ def _env_setup(self, initial_qpos):
 
     def _initialize_observation_space(self):
         image_shape = (
-            (self.observation_width, self.observation_height, 3 * self.frame_stack)
+            (self.observation_height, self.observation_width, 3 * self.frame_stack)
             if self.channel_last
-            else (3 * self.frame_stack, self.observation_width, self.observation_height)
+            else (3 * self.frame_stack, self.observation_height, self.observation_width)
         )
         obs = self.get_obs()
         if self.obs_type == "state":
@@ -223,8 +218,6 @@ def reset(
         while not did_reset_sim:
             did_reset_sim = self._reset_sim()
         observation = self.get_obs()
-        if self.render_mode == "human":
-            self.render()
         info = {}
         return observation, info
 
@@ -259,12 +252,13 @@ def step(self, action):
         self._mujoco.mj_step(self.model, self.data, nstep=2)
         self._step_callback()
         observation = self.get_obs()
-        # observation = self.get_obs()
         # observation = self._transform_obs(observation)
         reward = self.get_reward()
-        done = False
-        info = {"is_success": self.is_success(), "success": self.is_success()}
-        return observation, reward, done, info
+        terminated = is_success = self.is_success()
+        info = {"is_success": is_success}
+
+        truncated = False
+        return observation, reward, terminated, truncated, info
 
     def _step_callback(self):
         self._mujoco.mj_forward(self.model, self.data)
@@ -348,9 +342,9 @@ def render(self, mode="rgb_array"):
 
         render = self.observation_renderer.render("rgb_array", camera_name="camera0")
         if self.channel_last:
-            return render
+            return render.copy()
         else:
-            return render.transpose(2, 0, 1)
+            return render.transpose(2, 0, 1).copy()
 
     def _render_callback(self):
         self._mujoco.mj_forward(self.model, self.data)
diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py
index 5bb2293..06897cb 100644
--- a/gym_xarm/tasks/lift.py
+++ b/gym_xarm/tasks/lift.py
@@ -89,7 +89,6 @@ def _get_obs(self, agent_only=False):
             return np.concatenate(
                 [
                     eef,
-                    self.eef_velp,
                     self.gripper_angle,
                 ]
             )

From 79e18a320076e003e59c811267e5c0994097c5d9 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sat, 6 Apr 2024 11:07:51 +0200
Subject: [PATCH 06/16] Cleanup

---
 gym_xarm/tasks/__init__.py   | 186 +++++++----------------------------
 gym_xarm/tasks/base.py       |  91 ++++++-----------
 gym_xarm/tasks/lift.py       |  44 ++-------
 gym_xarm/tasks/peg_in_box.py |   2 +
 gym_xarm/tasks/push.py       |   2 +
 gym_xarm/tasks/reach.py      |   2 +
 tests/test_env.py            |  15 +--
 7 files changed, 79 insertions(+), 263 deletions(-)

diff --git a/gym_xarm/tasks/__init__.py b/gym_xarm/tasks/__init__.py
index a20e29d..3fabb3e 100644
--- a/gym_xarm/tasks/__init__.py
+++ b/gym_xarm/tasks/__init__.py
@@ -1,44 +1,42 @@
-from collections import OrderedDict, deque
-
-import gymnasium as gym
-import numpy as np
-from gymnasium.wrappers import TimeLimit
+from collections import OrderedDict
 
 from gym_xarm.tasks.base import Base as Base
 from gym_xarm.tasks.lift import Lift
-from gym_xarm.tasks.peg_in_box import PegInBox
-from gym_xarm.tasks.push import Push
-from gym_xarm.tasks.reach import Reach
+
+# from gym_xarm.tasks.peg_in_box import PegInBox
+# from gym_xarm.tasks.push import Push
+# from gym_xarm.tasks.reach import Reach
+
 
 TASKS = OrderedDict(
     (
-        (
-            "reach",
-            {
-                "env": Reach,
-                "action_space": "xyz",
-                "episode_length": 50,
-                "description": "Reach a target location with the end effector",
-            },
-        ),
-        (
-            "push",
-            {
-                "env": Push,
-                "action_space": "xyz",
-                "episode_length": 50,
-                "description": "Push a cube to a target location",
-            },
-        ),
-        (
-            "peg_in_box",
-            {
-                "env": PegInBox,
-                "action_space": "xyz",
-                "episode_length": 50,
-                "description": "Insert a peg into a box",
-            },
-        ),
+        # (
+        #     "reach",
+        #     {
+        #         "env": Reach,
+        #         "action_space": "xyz",
+        #         "episode_length": 50,
+        #         "description": "Reach a target location with the end effector",
+        #     },
+        # ),
+        # (
+        #     "push",
+        #     {
+        #         "env": Push,
+        #         "action_space": "xyz",
+        #         "episode_length": 50,
+        #         "description": "Push a cube to a target location",
+        #     },
+        # ),
+        # (
+        #     "peg_in_box",
+        #     {
+        #         "env": PegInBox,
+        #         "action_space": "xyz",
+        #         "episode_length": 50,
+        #         "description": "Insert a peg into a box",
+        #     },
+        # ),
         (
             "lift",
             {
@@ -50,121 +48,3 @@
         ),
     )
 )
-
-
-class SimXarmWrapper(gym.Wrapper):
-    """
-    DEPRECATED: Use gym.make()
-
-    A wrapper for the SimXarm environments. This wrapper is used to
-    convert the action and observation spaces to the correct format.
-    """
-
-    def __init__(self, env, task, obs_mode, image_size, action_repeat, frame_stack=1, channel_last=False):
-        super().__init__(env)
-        self._env = env
-        self.obs_mode = obs_mode
-        self.image_size = image_size
-        self.action_repeat = action_repeat
-        self.frame_stack = frame_stack
-        self._frames = deque([], maxlen=frame_stack)
-        self.channel_last = channel_last
-        self._max_episode_steps = task["episode_length"] // action_repeat
-
-        image_shape = (
-            (image_size, image_size, 3 * frame_stack)
-            if channel_last
-            else (3 * frame_stack, image_size, image_size)
-        )
-        if obs_mode == "state":
-            self.observation_space = env.observation_space["observation"]
-        elif obs_mode == "rgb":
-            self.observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)
-        elif obs_mode == "all":
-            self.observation_space = gym.spaces.Dict(
-                state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),
-                rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
-            )
-        else:
-            raise ValueError(f"Unknown obs_mode {obs_mode}. Must be one of [rgb, all, state]")
-        self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(task["action_space"]),))
-        self.action_padding = np.zeros(4 - len(task["action_space"]), dtype=np.float32)
-        if "w" not in task["action_space"]:
-            self.action_padding[-1] = 1.0
-
-    def _render_obs(self):
-        obs = self.render(mode="rgb_array", width=self.image_size, height=self.image_size)
-        if not self.channel_last:
-            obs = obs.transpose(2, 0, 1)
-        return obs.copy()
-
-    def _update_frames(self, reset=False):
-        pixels = self._render_obs()
-        self._frames.append(pixels)
-        if reset:
-            for _ in range(1, self.frame_stack):
-                self._frames.append(pixels)
-        assert len(self._frames) == self.frame_stack
-
-    def transform_obs(self, obs, reset=False):
-        if self.obs_mode == "state":
-            return obs["observation"]
-        elif self.obs_mode == "rgb":
-            self._update_frames(reset=reset)
-            rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
-            return rgb_obs
-        elif self.obs_mode == "all":
-            self._update_frames(reset=reset)
-            rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
-            return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
-        else:
-            raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]")
-
-    def reset(self):
-        return self.transform_obs(self._env.reset(), reset=True)
-
-    def step(self, action):
-        action = np.concatenate([action, self.action_padding])
-        reward = 0.0
-        for _ in range(self.action_repeat):
-            obs, r, done, info = self._env.step(action)
-            reward += r
-        return self.transform_obs(obs), reward, done, info
-
-    def render(self, mode="rgb_array", width=384, height=384, **kwargs):
-        return self._env.render(mode, width=width, height=height)
-
-    @property
-    def state(self):
-        return self._env.robot_state
-
-
-def make(task, obs_mode="state", image_size=84, action_repeat=1, frame_stack=1, channel_last=False, seed=0):
-    """
-    DEPRECATED: Use gym.make()
-
-    Create a new environment.
-    Args:
-            task (str): The task to create an environment for. Must be one of:
-                    - 'reach'
-                    - 'push'
-                    - 'peg-in-box'
-                    - 'lift'
-            obs_mode (str): The observation mode to use. Must be one of:
-                    - 'state': Only state observations
-                    - 'rgb': RGB images
-                    - 'all': RGB images and state observations
-            image_size (int): The size of the image observations
-            action_repeat (int): The number of times to repeat the action
-            seed (int): The random seed to use
-    Returns:
-            gym.Env: The environment
-    """
-    if task not in TASKS:
-        raise ValueError(f"Unknown task {task}. Must be one of {list(TASKS.keys())}")
-    env = TASKS[task]["env"]()
-    env = TimeLimit(env, TASKS[task]["episode_length"])
-    env = SimXarmWrapper(env, TASKS[task], obs_mode, image_size, action_repeat, frame_stack, channel_last)
-    env.seed(seed)
-
-    return env
diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 394e63d..5ae90ae 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -1,5 +1,4 @@
 import os
-from collections import deque
 
 import gymnasium as gym
 import mujoco
@@ -36,10 +35,9 @@ def __init__(
         observation_height=84,
         visualization_width=680,
         visualization_height=680,
-        frame_stack=1,
         channel_last=True,
     ):
-        # Env setup
+        # Coordinates
         if gripper_rotation is None:
             gripper_rotation = [0, 1, 0, 0]
         self.gripper_rotation = np.array(gripper_rotation, dtype=np.float32)
@@ -56,8 +54,6 @@ def __init__(
         self.observation_height = observation_height
         self.visualization_width = visualization_width
         self.visualization_height = visualization_height
-        self.frame_stack = frame_stack
-        self._frames = deque([], maxlen=frame_stack)
 
         # Assets
         self.xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml")
@@ -79,15 +75,6 @@ def __init__(
         if "w" not in self.metadata["action_space"]:
             self.action_padding[-1] = 1.0
 
-        # super().__init__(
-        #     xml_path = os.path.join(os.path.dirname(__file__), "assets", f"{task}.xml"),
-        #     n_substeps=20,
-        #     n_actions=4,
-        #     initial_qpos={},
-        #     width=image_size,
-        #     height=image_size,
-        # )
-
     def _initialize_simulation(self):
         """Initialize MuJoCo simulation data structures mjModel and mjData."""
         self.model = self._mujoco.MjModel.from_xml_path(self.xml_path)
@@ -116,9 +103,9 @@ def _env_setup(self, initial_qpos):
 
     def _initialize_observation_space(self):
         image_shape = (
-            (self.observation_height, self.observation_width, 3 * self.frame_stack)
+            (self.observation_height, self.observation_width, 3)
             if self.channel_last
-            else (3 * self.frame_stack, self.observation_height, self.observation_width)
+            else (3, self.observation_height, self.observation_width)
         )
         obs = self.get_obs()
         if self.obs_type == "state":
@@ -145,7 +132,7 @@ def _initialize_renderer(self, type: str):
         if type == "observation":
             model = self.model
         elif type == "visualization":
-            # HACK: MujoCo doesn't allow for custom size rendering on-the-fly, so we
+            # HACK: gymnasium doesn't allow for custom size rendering on-the-fly, so we
             # initialize another renderer with appropriate size for visualization purposes
             # see https://gymnasium.farama.org/content/migration-guide/#environment-render
             from copy import deepcopy
@@ -165,16 +152,35 @@ def dt(self):
 
     @property
     def eef(self):
-        return self._utils.get_site_xpos(self.model, self.data, "grasp")
+        return self._utils.get_site_xpos(self.model, self.data, "grasp") - self.center_of_table
 
     @property
-    def obj(self):
-        return self._utils.get_site_xpos(self.model, self.data, "object_site")
+    def eef_velp(self):
+        return self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt
+
+    @property
+    def gripper_angle(self):
+        return self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
 
     @property
     def robot_state(self):
-        gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
-        return np.concatenate([self.eef, gripper_angle])
+        return np.concatenate([self.eef - self.center_of_table, self.gripper_angle])
+
+    @property
+    def obj(self):
+        return self._utils.get_site_xpos(self.model, self.data, "object_site") - self.center_of_table
+
+    @property
+    def obj_rot(self):
+        return self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:]
+
+    @property
+    def obj_velp(self):
+        return self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt
+
+    @property
+    def obj_velr(self):
+        return self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt
 
     def is_success(self):
         """Indicates whether or not the achieved goal successfully achieved the desired goal."""
@@ -237,14 +243,6 @@ def _reset_sim(self):
         mujoco.mj_forward(self.model, self.data)
         return True
 
-    # def reset(self, seed=None, options=None):
-    #     super().reset(seed=seed, options=options)
-    #     self._reset_sim()
-    #     observation = self._get_obs()
-    #     observation = self._transform_obs(observation)
-    #     info = {}
-    #     return observation, info
-
     def step(self, action):
         assert action.shape == (4,)
         assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid"
@@ -252,12 +250,11 @@ def step(self, action):
         self._mujoco.mj_step(self.model, self.data, nstep=2)
         self._step_callback()
         observation = self.get_obs()
-        # observation = self._transform_obs(observation)
         reward = self.get_reward()
         terminated = is_success = self.is_success()
+        truncated = False
         info = {"is_success": is_success}
 
-        truncated = False
         return observation, reward, terminated, truncated, info
 
     def _step_callback(self):
@@ -307,40 +304,12 @@ def _set_gripper(self, gripper_pos, gripper_rotation):
         self.data.qpos[10] = 0.0
         self.data.qpos[12] = 0.0
 
-    # def _transform_obs(self, obs, reset=False):
-    #     if self.obs_type == "state":
-    #         return obs["observation"]
-    #     elif self.obs_type == "rgb":
-    #         self._update_frames(reset=reset)
-    #         rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
-    #         return rgb_obs
-    #     elif self.obs_type == "all":
-    #         self._update_frames(reset=reset)
-    #         rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
-    #         return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
-    #     else:
-    #         raise ValueError(f"Unknown obs_type {self.obs_type}. Must be one of [rgb, all, state]")
-
-    # def _update_frames(self, reset=False):
-    #     pixels = self._render_obs()
-    #     self._frames.append(pixels)
-    #     if reset:
-    #         for _ in range(1, self.frame_stack):
-    #             self._frames.append(pixels)
-    #     assert len(self._frames) == self.frame_stack
-
-    # def _render_obs(self):
-    #     obs = self.render(mode="rgb_array")
-    #     if not self.channel_last:
-    #         obs = obs.transpose(2, 0, 1)
-    #     return obs.copy()
-
     def render(self, mode="rgb_array"):
         self._render_callback()
         if mode == "visualize":
             return self.visualization_renderer.render("rgb_array", camera_name="camera0")
 
-        render = self.observation_renderer.render("rgb_array", camera_name="camera0")
+        render = self.observation_renderer.render(mode, camera_name="camera0")
         if self.channel_last:
             return render.copy()
         else:
diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py
index 06897cb..12cb564 100644
--- a/gym_xarm/tasks/lift.py
+++ b/gym_xarm/tasks/lift.py
@@ -19,26 +19,6 @@ def __init__(self, **kwargs):
     def z_target(self):
         return self._init_z + self._z_threshold
 
-    @property
-    def eef_velp(self):
-        return self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt
-
-    @property
-    def obj_rot(self):
-        return self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:]
-
-    @property
-    def obj_velp(self):
-        return self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt
-
-    @property
-    def obj_velr(self):
-        return self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt
-
-    @property
-    def gripper_angle(self):
-        return self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
-
     def is_success(self):
         return self.obj[2] >= self.z_target
 
@@ -76,29 +56,19 @@ def get_obs(self):
         elif self.obs_type == "pixels_agent_pos":
             return {
                 "pixels": pixels,
-                "agent_pos": self._get_obs(agent_only=True),
+                "agent_pos": self.robot_state,
             }
         else:
             raise ValueError(
                 f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]"
             )
 
-    def _get_obs(self, agent_only=False):
-        eef = self.eef - self.center_of_table
-        if agent_only:
-            return np.concatenate(
-                [
-                    eef,
-                    self.gripper_angle,
-                ]
-            )
-
-        obj = self.obj - self.center_of_table
+    def _get_obs(self):
         return np.concatenate(
             [
-                eef,
+                self.eef,
                 self.eef_velp,
-                obj,
+                self.obj,
                 self.obj_rot,
                 self.obj_velp,
                 self.obj_velr,
@@ -106,10 +76,10 @@ def _get_obs(self, agent_only=False):
                 np.array(
                     [
                         np.linalg.norm(self.eef - self.obj),
-                        np.linalg.norm(eef[:-1] - obj[:-1]),
+                        np.linalg.norm(self.eef[:-1] - self.obj[:-1]),
                         self.z_target,
-                        self.z_target - obj[-1],
-                        self.z_target - eef[-1],
+                        self.z_target - self.obj[-1],
+                        self.z_target - self.eef[-1],
                     ]
                 ),
                 self.gripper_angle,
diff --git a/gym_xarm/tasks/peg_in_box.py b/gym_xarm/tasks/peg_in_box.py
index 4f21c48..1db5e42 100644
--- a/gym_xarm/tasks/peg_in_box.py
+++ b/gym_xarm/tasks/peg_in_box.py
@@ -4,6 +4,8 @@
 
 
 class PegInBox(Base):
+    """DEPRECATED: use only Lift for now"""
+
     def __init__(self):
         super().__init__("peg_in_box")
 
diff --git a/gym_xarm/tasks/push.py b/gym_xarm/tasks/push.py
index 8b145a6..daa6b60 100644
--- a/gym_xarm/tasks/push.py
+++ b/gym_xarm/tasks/push.py
@@ -4,6 +4,8 @@
 
 
 class Push(Base):
+    """DEPRECATED: use only Lift for now"""
+
     def __init__(self):
         super().__init__("push")
 
diff --git a/gym_xarm/tasks/reach.py b/gym_xarm/tasks/reach.py
index 94688e7..ee1a533 100644
--- a/gym_xarm/tasks/reach.py
+++ b/gym_xarm/tasks/reach.py
@@ -4,6 +4,8 @@
 
 
 class Reach(Base):
+    """DEPRECATED: use only Lift for now"""
+
     def __init__(self):
         super().__init__("reach")
 
diff --git a/tests/test_env.py b/tests/test_env.py
index 9d2e8ba..f08f951 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -2,29 +2,20 @@
 import gymnasium as gym
 from gymnasium.utils.env_checker import check_env
 
+import gym_xarm  # noqa: F401
+
 @pytest.mark.parametrize(
     "env_task, obs_type",
     [
         ("XarmLift-v0", "state"),
         ("XarmLift-v0", "pixels"),
         ("XarmLift-v0", "pixels_agent_pos"),
-        # TODO(aliberts): Add other tasks
-        # ("reach", False, False),
-        # ("reach", True, False),
-        # ("push", False, False),
-        # ("push", True, False),
-        # ("peg_in_box", False, False),
-        # ("peg_in_box", True, False),
     ],
 )
 def test_env(env_task, obs_type):
-    import gym_xarm  # noqa: F401
     env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type)
-    check_env(env.unwrapped, skip_render_check=True)
-    # env.reset()
-    # env.render()
+    check_env(env.unwrapped)
 
 
 if __name__ == "__main__":
     test_env("XarmLift-v0", "pixels_agent_pos")
-    # test_env("XarmLift-v0", "state")
\ No newline at end of file

From 0eb8d27d85e5500186451f9dc694c4a8ab461a44 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sat, 6 Apr 2024 11:17:12 +0200
Subject: [PATCH 07/16] Comment unused tasks

---
 gym_xarm/tasks/peg_in_box.py | 163 ++++++++++++++++++-----------------
 gym_xarm/tasks/push.py       | 127 +++++++++++++--------------
 gym_xarm/tasks/reach.py      |  79 ++++++++---------
 3 files changed, 186 insertions(+), 183 deletions(-)

diff --git a/gym_xarm/tasks/peg_in_box.py b/gym_xarm/tasks/peg_in_box.py
index 1db5e42..d9425af 100644
--- a/gym_xarm/tasks/peg_in_box.py
+++ b/gym_xarm/tasks/peg_in_box.py
@@ -1,4 +1,4 @@
-import numpy as np
+# import numpy as np
 
 from gym_xarm.tasks import Base
 
@@ -6,83 +6,84 @@
 class PegInBox(Base):
     """DEPRECATED: use only Lift for now"""
 
-    def __init__(self):
-        super().__init__("peg_in_box")
-
-    def _reset_sim(self):
-        self._act_magnitude = 0
-        super()._reset_sim()
-        for _ in range(10):
-            self._apply_action(np.array([0, 0, 0, 1], dtype=np.float32))
-            self.sim.step()
-
-    @property
-    def box(self):
-        return self.sim.data.get_site_xpos("box_site")
-
-    def is_success(self):
-        return np.linalg.norm(self.obj - self.box) <= 0.05
-
-    def get_reward(self):
-        dist_xy = np.linalg.norm(self.obj[:2] - self.box[:2])
-        dist_xyz = np.linalg.norm(self.obj - self.box)
-        return float(dist_xy <= 0.045) * (2 - 6 * dist_xyz) - 0.2 * np.square(self._act_magnitude) - dist_xy
-
-    def _get_obs(self):
-        eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
-        gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
-        eef, box = self.eef - self.center_of_table, self.box - self.center_of_table
-
-        obj = self.obj - self.center_of_table
-        obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:]
-        obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt
-        obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt
-
-        obs = np.concatenate(
-            [
-                eef,
-                eef_velp,
-                box,
-                obj,
-                obj_rot,
-                obj_velp,
-                obj_velr,
-                eef - box,
-                eef - obj,
-                obj - box,
-                np.array(
-                    [
-                        np.linalg.norm(eef - box),
-                        np.linalg.norm(eef - obj),
-                        np.linalg.norm(obj - box),
-                        gripper_angle,
-                    ]
-                ),
-            ],
-            axis=0,
-        )
-        return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": box}
-
-    def _sample_goal(self):
-        # Gripper
-        gripper_pos = np.array([1.280, 0.295, 0.9]) + self.np_random.uniform(-0.05, 0.05, size=3)
-        super()._set_gripper(gripper_pos, self.gripper_rotation)
-
-        # Object
-        object_pos = gripper_pos - np.array([0, 0, 0.06]) + self.np_random.uniform(-0.005, 0.005, size=3)
-        object_qpos = self.sim.data.get_joint_qpos("object_joint0")
-        object_qpos[:3] = object_pos
-        self.sim.data.set_joint_qpos("object_joint0", object_qpos)
-
-        # Box
-        box_pos = np.array([1.61, 0.18, 0.58])
-        box_pos[:2] += self.np_random.uniform(-0.11, 0.11, size=2)
-        box_qpos = self.sim.data.get_joint_qpos("box_joint0")
-        box_qpos[:3] = box_pos
-        self.sim.data.set_joint_qpos("box_joint0", box_qpos)
-
-        return self.box
-
-    def step(self, action):
-        self._act_magnitude = np.linalg.norm(action[:3])
-        return super().step(action)
+    ...
+    # def __init__(self):
+    #     super().__init__("peg_in_box")
+
+    # def _reset_sim(self):
+    #     self._act_magnitude = 0
+    #     super()._reset_sim()
+    #     for _ in range(10):
+    #         self._apply_action(np.array([0, 0, 0, 1], dtype=np.float32))
+    #         self.sim.step()
+
+    # @property
+    # def box(self):
+    #     return self.sim.data.get_site_xpos("box_site")
+
+    # def is_success(self):
+    #     return np.linalg.norm(self.obj - self.box) <= 0.05
+
+    # def get_reward(self):
+    #     dist_xy = np.linalg.norm(self.obj[:2] - self.box[:2])
+    #     dist_xyz = np.linalg.norm(self.obj - self.box)
+    #     return float(dist_xy <= 0.045) * (2 - 6 * dist_xyz) - 0.2 * np.square(self._act_magnitude) - dist_xy
+
+    # def _get_obs(self):
+    #     eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
+    #     gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
+    #     eef, box = self.eef - self.center_of_table, self.box - self.center_of_table
+
+    #     obj = self.obj - self.center_of_table
+    #     obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:]
+    #     obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt
+    #     obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt
+
+    #     obs = np.concatenate(
+    #         [
+    #             eef,
+    #             eef_velp,
+    #             box,
+    #             obj,
+    #             obj_rot,
+    #             obj_velp,
+    #             obj_velr,
+    #             eef - box,
+    #             eef - obj,
+    #             obj - box,
+    #             np.array(
+    #                 [
+    #                     np.linalg.norm(eef - box),
+    #                     np.linalg.norm(eef - obj),
+    #                     np.linalg.norm(obj - box),
+    #                     gripper_angle,
+    #                 ]
+    #             ),
+    #         ],
+    #         axis=0,
+    #     )
+    #     return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": box}
+
+    # def _sample_goal(self):
+    #     # Gripper
+    #     gripper_pos = np.array([1.280, 0.295, 0.9]) + self.np_random.uniform(-0.05, 0.05, size=3)
+    #     super()._set_gripper(gripper_pos, self.gripper_rotation)
+
+    #     # Object
+    #     object_pos = gripper_pos - np.array([0, 0, 0.06]) + self.np_random.uniform(-0.005, 0.005, size=3)
+    #     object_qpos = self.sim.data.get_joint_qpos("object_joint0")
+    #     object_qpos[:3] = object_pos
+    #     self.sim.data.set_joint_qpos("object_joint0", object_qpos)
+
+    #     # Box
+    #     box_pos = np.array([1.61, 0.18, 0.58])
+    #     box_pos[:2] += self.np_random.uniform(-0.11, 0.11, size=2)
+    #     box_qpos = self.sim.data.get_joint_qpos("box_joint0")
+    #     box_qpos[:3] = box_pos
+    #     self.sim.data.set_joint_qpos("box_joint0", box_qpos)
+
+    #     return self.box
+
+    # def step(self, action):
+    #     self._act_magnitude = np.linalg.norm(action[:3])
+    #     return super().step(action)
diff --git a/gym_xarm/tasks/push.py b/gym_xarm/tasks/push.py
index daa6b60..0efc933 100644
--- a/gym_xarm/tasks/push.py
+++ b/gym_xarm/tasks/push.py
@@ -1,4 +1,4 @@
-import numpy as np
+# import numpy as np
 
 from gym_xarm.tasks import Base
 
@@ -6,75 +6,76 @@
 class Push(Base):
     """DEPRECATED: use only Lift for now"""
 
-    def __init__(self):
-        super().__init__("push")
+    ...
+    # def __init__(self):
+    #     super().__init__("push")
 
-    def _reset_sim(self):
-        self._act_magnitude = 0
-        super()._reset_sim()
+    # def _reset_sim(self):
+    #     self._act_magnitude = 0
+    #     super()._reset_sim()
 
-    def is_success(self):
-        return np.linalg.norm(self.obj - self.goal) <= 0.05
+    # def is_success(self):
+    #     return np.linalg.norm(self.obj - self.goal) <= 0.05
 
-    def get_reward(self):
-        dist = np.linalg.norm(self.obj - self.goal)
-        penalty = self._act_magnitude**2
-        return -(dist + 0.15 * penalty)
+    # def get_reward(self):
+    #     dist = np.linalg.norm(self.obj - self.goal)
+    #     penalty = self._act_magnitude**2
+    #     return -(dist + 0.15 * penalty)
 
-    def _get_obs(self):
-        eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
-        gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
-        eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table
+    # def _get_obs(self):
+    #     eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
+    #     gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
+    #     eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table
 
-        obj = self.obj - self.center_of_table
-        obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:]
-        obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt
-        obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt
+    #     obj = self.obj - self.center_of_table
+    #     obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:]
+    #     obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt
+    #     obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt
 
-        obs = np.concatenate(
-            [
-                eef,
-                eef_velp,
-                goal,
-                obj,
-                obj_rot,
-                obj_velp,
-                obj_velr,
-                eef - goal,
-                eef - obj,
-                obj - goal,
-                np.array(
-                    [
-                        np.linalg.norm(eef - goal),
-                        np.linalg.norm(eef - obj),
-                        np.linalg.norm(obj - goal),
-                        gripper_angle,
-                    ]
-                ),
-            ],
-            axis=0,
-        )
-        return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal}
+    #     obs = np.concatenate(
+    #         [
+    #             eef,
+    #             eef_velp,
+    #             goal,
+    #             obj,
+    #             obj_rot,
+    #             obj_velp,
+    #             obj_velr,
+    #             eef - goal,
+    #             eef - obj,
+    #             obj - goal,
+    #             np.array(
+    #                 [
+    #                     np.linalg.norm(eef - goal),
+    #                     np.linalg.norm(eef - obj),
+    #                     np.linalg.norm(obj - goal),
+    #                     gripper_angle,
+    #                 ]
+    #             ),
+    #         ],
+    #         axis=0,
+    #     )
+    #     return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal}
 
-    def _sample_goal(self):
-        # Gripper
-        gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
-        super()._set_gripper(gripper_pos, self.gripper_rotation)
+    # def _sample_goal(self):
+    #     # Gripper
+    #     gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
+    #     super()._set_gripper(gripper_pos, self.gripper_rotation)
 
-        # Object
-        object_pos = self.center_of_table - np.array([0.25, 0, 0.07])
-        object_pos[0] += self.np_random.uniform(-0.08, 0.08, size=1)
-        object_pos[1] += self.np_random.uniform(-0.08, 0.08, size=1)
-        object_qpos = self.sim.data.get_joint_qpos("object_joint0")
-        object_qpos[:3] = object_pos
-        self.sim.data.set_joint_qpos("object_joint0", object_qpos)
+    #     # Object
+    #     object_pos = self.center_of_table - np.array([0.25, 0, 0.07])
+    #     object_pos[0] += self.np_random.uniform(-0.08, 0.08, size=1)
+    #     object_pos[1] += self.np_random.uniform(-0.08, 0.08, size=1)
+    #     object_qpos = self.sim.data.get_joint_qpos("object_joint0")
+    #     object_qpos[:3] = object_pos
+    #     self.sim.data.set_joint_qpos("object_joint0", object_qpos)
 
-        # Goal
-        self.goal = np.array([1.600, 0.200, 0.545])
-        self.goal[:2] += self.np_random.uniform(-0.1, 0.1, size=2)
-        self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal
-        return self.goal
+    #     # Goal
+    #     self.goal = np.array([1.600, 0.200, 0.545])
+    #     self.goal[:2] += self.np_random.uniform(-0.1, 0.1, size=2)
+    #     self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal
+    #     return self.goal
 
-    def step(self, action):
-        self._act_magnitude = np.linalg.norm(action[:3])
-        return super().step(action)
+    # def step(self, action):
+    #     self._act_magnitude = np.linalg.norm(action[:3])
+    #     return super().step(action)
diff --git a/gym_xarm/tasks/reach.py b/gym_xarm/tasks/reach.py
index ee1a533..828e989 100644
--- a/gym_xarm/tasks/reach.py
+++ b/gym_xarm/tasks/reach.py
@@ -1,4 +1,4 @@
-import numpy as np
+# import numpy as np
 
 from gym_xarm.tasks import Base
 
@@ -6,41 +6,42 @@
 class Reach(Base):
     """DEPRECATED: use only Lift for now"""
 
-    def __init__(self):
-        super().__init__("reach")
-
-    def _reset_sim(self):
-        self._act_magnitude = 0
-        super()._reset_sim()
-
-    def is_success(self):
-        return np.linalg.norm(self.eef - self.goal) <= 0.05
-
-    def get_reward(self):
-        dist = np.linalg.norm(self.eef - self.goal)
-        penalty = self._act_magnitude**2
-        return -(dist + 0.15 * penalty)
-
-    def _get_obs(self):
-        eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
-        gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
-        eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table
-        obs = np.concatenate(
-            [eef, eef_velp, goal, eef - goal, np.array([np.linalg.norm(eef - goal), gripper_angle])], axis=0
-        )
-        return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal}
-
-    def _sample_goal(self):
-        # Gripper
-        gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
-        super()._set_gripper(gripper_pos, self.gripper_rotation)
-
-        # Goal
-        self.goal = np.array([1.550, 0.287, 0.580])
-        self.goal[:2] += self.np_random.uniform(-0.125, 0.125, size=2)
-        self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal
-        return self.goal
-
-    def step(self, action):
-        self._act_magnitude = np.linalg.norm(action[:3])
-        return super().step(action)
+    ...
+    # def __init__(self):
+    #     super().__init__("reach")
+
+    # def _reset_sim(self):
+    #     self._act_magnitude = 0
+    #     super()._reset_sim()
+
+    # def is_success(self):
+    #     return np.linalg.norm(self.eef - self.goal) <= 0.05
+
+    # def get_reward(self):
+    #     dist = np.linalg.norm(self.eef - self.goal)
+    #     penalty = self._act_magnitude**2
+    #     return -(dist + 0.15 * penalty)
+
+    # def _get_obs(self):
+    #     eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
+    #     gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
+    #     eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table
+    #     obs = np.concatenate(
+    #         [eef, eef_velp, goal, eef - goal, np.array([np.linalg.norm(eef - goal), gripper_angle])], axis=0
+    #     )
+    #     return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal}
+
+    # def _sample_goal(self):
+    #     # Gripper
+    #     gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
+    #     super()._set_gripper(gripper_pos, self.gripper_rotation)
+
+    #     # Goal
+    #     self.goal = np.array([1.550, 0.287, 0.580])
+    #     self.goal[:2] += self.np_random.uniform(-0.125, 0.125, size=2)
+    #     self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal
+    #     return self.goal
+
+    # def step(self, action):
+    #     self._act_magnitude = np.linalg.norm(action[:3])
+    #     return super().step(action)

From 4c65f3ed8c7aeb938cc42a3c8af7b31a319c10ef Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sat, 6 Apr 2024 11:39:47 +0200
Subject: [PATCH 08/16] Set MUJOCO_GL on-the-fly

---
 .github/workflows/test.yml | 1 -
 gym_xarm/tasks/base.py     | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2769dcd..dafaf9e 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -13,7 +13,6 @@ jobs:
     runs-on: ubuntu-latest
     env:
       POETRY_VERSION: 1.8.2
-      MUJOCO_GL: egl
     steps:
       #----------------------------------------------
       #       check-out repo and set-up python
diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 5ae90ae..5f7f893 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -143,6 +143,9 @@ def _initialize_renderer(self, type: str):
         else:
             raise ValueError(f"Unknown renderer type {type}. Must be one of [observation, visualization]")
 
+        if os.environ.get("MUJOCO_GL") not in ["glfw", "egl", "osmesa"]:
+            os.environ["MUJOCO_GL"] = "egl"
+
         return MujocoRenderer(model, self.data)
 
     @property

From d06072c47d4084b7f329e3e149f5f296badb5dce Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sat, 6 Apr 2024 11:49:12 +0200
Subject: [PATCH 09/16] Use _ALL_RENDERERS

---
 gym_xarm/tasks/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 5f7f893..4d8affa 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -3,7 +3,7 @@
 import gymnasium as gym
 import mujoco
 import numpy as np
-from gymnasium.envs.mujoco.mujoco_rendering import MujocoRenderer
+from gymnasium.envs.mujoco.mujoco_rendering import _ALL_RENDERERS, MujocoRenderer
 from gymnasium_robotics.utils import mujoco_utils
 
 from gym_xarm.tasks import mocap
@@ -143,7 +143,7 @@ def _initialize_renderer(self, type: str):
         else:
             raise ValueError(f"Unknown renderer type {type}. Must be one of [observation, visualization]")
 
-        if os.environ.get("MUJOCO_GL") not in ["glfw", "egl", "osmesa"]:
+        if os.environ.get("MUJOCO_GL") not in _ALL_RENDERERS:
             os.environ["MUJOCO_GL"] = "egl"
 
         return MujocoRenderer(model, self.data)

From 3a72cf0a36c2a9f73ebf1020705ea26f20a37dc0 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sat, 6 Apr 2024 13:00:22 +0200
Subject: [PATCH 10/16] Cleanup

---
 .github/workflows/test.yml | 2 +-
 tests/test_env.py          | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index dafaf9e..6f6711d 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -83,7 +83,7 @@ jobs:
           path: .venv
           key: venv-${{ steps.setup-python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
 
-      - name: Install libegl1-mesa-dev (to use MUJOCO_GL=egl)
+      - name: Install libegl1-mesa-dev
         run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
 
       #----------------------------------------------
diff --git a/tests/test_env.py b/tests/test_env.py
index f08f951..098f25e 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -15,7 +15,3 @@
 def test_env(env_task, obs_type):
     env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type)
     check_env(env.unwrapped)
-
-
-if __name__ == "__main__":
-    test_env("XarmLift-v0", "pixels_agent_pos")

From e54cdcc9ef40fa961e80239f8b6dd1640dac6e8e Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sat, 6 Apr 2024 14:55:11 +0200
Subject: [PATCH 11/16] Move get_obs, use render_mode

---
 gym_xarm/tasks/base.py | 64 +++++++++++++++++++++++++-----------------
 gym_xarm/tasks/lift.py | 16 -----------
 2 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 4d8affa..6f7e73a 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -18,7 +18,7 @@ class Base(gym.Env):
     """
 
     metadata = {
-        "render_modes": [],
+        "render_modes": ["human", "rgb_array"],
         "render_fps": 25,
     }
     n_substeps = 20
@@ -30,12 +30,12 @@ def __init__(
         self,
         task,
         obs_type="state",
+        render_mode="rgb_array",
         gripper_rotation=None,
         observation_width=84,
         observation_height=84,
         visualization_width=680,
         visualization_height=680,
-        channel_last=True,
     ):
         # Coordinates
         if gripper_rotation is None:
@@ -47,9 +47,9 @@ def __init__(
 
         # Observations
         self.obs_type = obs_type
-        self.channel_last = channel_last
 
         # Rendering
+        self.render_mode = render_mode
         self.observation_width = observation_width
         self.observation_height = observation_height
         self.visualization_width = visualization_width
@@ -62,8 +62,8 @@ def __init__(
 
         # Initialize sim, spaces & renderers
         self._initialize_simulation()
-        self.observation_renderer = self._initialize_renderer(type="observation")
-        self.visualization_renderer = self._initialize_renderer(type="visualization")
+        self.observation_renderer = self._initialize_renderer(render_type="observation")
+        self.visualization_renderer = self._initialize_renderer(render_type="visualization")
         self.observation_space = self._initialize_observation_space()
         self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),))
         self.action_padding = np.zeros(4 - len(self.metadata["action_space"]), dtype=np.float32)
@@ -102,11 +102,7 @@ def _env_setup(self, initial_qpos):
         mujoco.mj_forward(self.model, self.data)
 
     def _initialize_observation_space(self):
-        image_shape = (
-            (self.observation_height, self.observation_width, 3)
-            if self.channel_last
-            else (3, self.observation_height, self.observation_width)
-        )
+        image_shape = (self.observation_height, self.observation_width, 3)
         obs = self.get_obs()
         if self.obs_type == "state":
             observation_space = gym.spaces.Box(-np.inf, np.inf, shape=obs.shape, dtype=np.float64)
@@ -128,10 +124,10 @@ def _initialize_observation_space(self):
 
         return observation_space
 
-    def _initialize_renderer(self, type: str):
-        if type == "observation":
+    def _initialize_renderer(self, render_type: str):
+        if render_type == "observation":
             model = self.model
-        elif type == "visualization":
+        elif render_type == "visualization":
             # HACK: gymnasium doesn't allow for custom size rendering on-the-fly, so we
             # initialize another renderer with appropriate size for visualization purposes
             # see https://gymnasium.farama.org/content/migration-guide/#environment-render
@@ -141,7 +137,9 @@ def _initialize_renderer(self, type: str):
             model.vis.global_.offwidth = self.visualization_width
             model.vis.global_.offheight = self.visualization_height
         else:
-            raise ValueError(f"Unknown renderer type {type}. Must be one of [observation, visualization]")
+            raise ValueError(
+                f"Unknown render type {render_type}. Must be one of [observation, visualization]"
+            )
 
         if os.environ.get("MUJOCO_GL") not in _ALL_RENDERERS:
             os.environ["MUJOCO_GL"] = "egl"
@@ -196,9 +194,6 @@ def _sample_goal(self):
         """Samples a new goal and returns it."""
         raise NotImplementedError()
 
-    def get_obs(self):
-        raise NotImplementedError()
-
     def reset(
         self,
         *,
@@ -246,6 +241,22 @@ def _reset_sim(self):
         mujoco.mj_forward(self.model, self.data)
         return True
 
+    def get_obs(self):
+        if self.obs_type == "state":
+            return self._get_obs()
+        pixels = self._render(render_type="observation")
+        if self.obs_type == "pixels":
+            return pixels
+        elif self.obs_type == "pixels_agent_pos":
+            return {
+                "pixels": pixels,
+                "agent_pos": self.robot_state,
+            }
+        else:
+            raise ValueError(
+                f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]"
+            )
+
     def step(self, action):
         assert action.shape == (4,)
         assert self.action_space.contains(action), f"{action!r} ({type(action)}) invalid"
@@ -307,16 +318,19 @@ def _set_gripper(self, gripper_pos, gripper_rotation):
         self.data.qpos[10] = 0.0
         self.data.qpos[12] = 0.0
 
-    def render(self, mode="rgb_array"):
-        self._render_callback()
-        if mode == "visualize":
-            return self.visualization_renderer.render("rgb_array", camera_name="camera0")
+    def render(self):
+        return self._render(render_type="visualization")
 
-        render = self.observation_renderer.render(mode, camera_name="camera0")
-        if self.channel_last:
-            return render.copy()
+    def _render(self, render_type):
+        self._render_callback()
+        if render_type == "visualization":
+            render = self.visualization_renderer.render(self.render_mode, camera_name="camera0")
+        elif render_type == "observation":
+            render = self.observation_renderer.render(self.render_mode, camera_name="camera0")
         else:
-            return render.transpose(2, 0, 1).copy()
+            raise ValueError(render_type)
+
+        return render.copy()
 
     def _render_callback(self):
         self._mujoco.mj_forward(self.model, self.data)
diff --git a/gym_xarm/tasks/lift.py b/gym_xarm/tasks/lift.py
index 12cb564..4ecd0c2 100644
--- a/gym_xarm/tasks/lift.py
+++ b/gym_xarm/tasks/lift.py
@@ -47,22 +47,6 @@ def get_reward(self):
 
         return reach_reward / 100 + pick_reward
 
-    def get_obs(self):
-        if self.obs_type == "state":
-            return self._get_obs()
-        pixels = self.render()
-        if self.obs_type == "pixels":
-            return pixels
-        elif self.obs_type == "pixels_agent_pos":
-            return {
-                "pixels": pixels,
-                "agent_pos": self.robot_state,
-            }
-        else:
-            raise ValueError(
-                f"Unknown obs_type {self.obs_type}. Must be one of [pixels, state, pixels_agent_pos]"
-            )
-
     def _get_obs(self):
         return np.concatenate(
             [

From 4c3a5bdb0105ef09abe5402f718608e3741a5625 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sat, 6 Apr 2024 15:51:33 +0200
Subject: [PATCH 12/16] WIP

---
 gym_xarm/tasks/base.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 6f7e73a..cbbd46e 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -8,6 +8,12 @@
 
 from gym_xarm.tasks import mocap
 
+RENDER_MODES = ["rgb_array"]
+if os.environ.get("MUJOCO_GL") == "glfw":
+    RENDER_MODES.append("human")
+elif os.environ.get("MUJOCO_GL") not in _ALL_RENDERERS:
+    os.environ["MUJOCO_GL"] = "egl"
+
 
 class Base(gym.Env):
     """
@@ -18,7 +24,7 @@ class Base(gym.Env):
     """
 
     metadata = {
-        "render_modes": ["human", "rgb_array"],
+        "render_modes": RENDER_MODES,
         "render_fps": 25,
     }
     n_substeps = 20
@@ -141,9 +147,6 @@ def _initialize_renderer(self, render_type: str):
                 f"Unknown render type {render_type}. Must be one of [observation, visualization]"
             )
 
-        if os.environ.get("MUJOCO_GL") not in _ALL_RENDERERS:
-            os.environ["MUJOCO_GL"] = "egl"
-
         return MujocoRenderer(model, self.data)
 
     @property
@@ -325,12 +328,14 @@ def _render(self, render_type):
         self._render_callback()
         if render_type == "visualization":
             render = self.visualization_renderer.render(self.render_mode, camera_name="camera0")
+            if self.render_mode != "human":
+                render = render.copy()
         elif render_type == "observation":
-            render = self.observation_renderer.render(self.render_mode, camera_name="camera0")
+            render = self.observation_renderer.render("rgb_array", camera_name="camera0").copy()
         else:
             raise ValueError(render_type)
 
-        return render.copy()
+        return render
 
     def _render_callback(self):
         self._mujoco.mj_forward(self.model, self.data)

From 8c9b4274ab7436cdb3378d099acd509f35e77051 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sat, 6 Apr 2024 16:30:08 +0200
Subject: [PATCH 13/16] Pass renderer object on _render calls

---
 gym_xarm/tasks/base.py | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index cbbd46e..7e2ddc4 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -68,8 +68,8 @@ def __init__(
 
         # Initialize sim, spaces & renderers
         self._initialize_simulation()
-        self.observation_renderer = self._initialize_renderer(render_type="observation")
-        self.visualization_renderer = self._initialize_renderer(render_type="visualization")
+        self.observation_renderer = self._initialize_renderer(renderer_type="observation")
+        self.visualization_renderer = self._initialize_renderer(renderer_type="visualization")
         self.observation_space = self._initialize_observation_space()
         self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(self.metadata["action_space"]),))
         self.action_padding = np.zeros(4 - len(self.metadata["action_space"]), dtype=np.float32)
@@ -130,10 +130,10 @@ def _initialize_observation_space(self):
 
         return observation_space
 
-    def _initialize_renderer(self, render_type: str):
-        if render_type == "observation":
+    def _initialize_renderer(self, renderer_type: str):
+        if renderer_type == "observation":
             model = self.model
-        elif render_type == "visualization":
+        elif renderer_type == "visualization":
             # HACK: gymnasium doesn't allow for custom size rendering on-the-fly, so we
             # initialize another renderer with appropriate size for visualization purposes
             # see https://gymnasium.farama.org/content/migration-guide/#environment-render
@@ -144,7 +144,7 @@ def _initialize_renderer(self, render_type: str):
             model.vis.global_.offheight = self.visualization_height
         else:
             raise ValueError(
-                f"Unknown render type {render_type}. Must be one of [observation, visualization]"
+                f"Unknown render type {renderer_type}. Must be one of [observation, visualization]"
             )
 
         return MujocoRenderer(model, self.data)
@@ -247,7 +247,7 @@ def _reset_sim(self):
     def get_obs(self):
         if self.obs_type == "state":
             return self._get_obs()
-        pixels = self._render(render_type="observation")
+        pixels = self._render(renderer=self.observation_renderer)
         if self.obs_type == "pixels":
             return pixels
         elif self.obs_type == "pixels_agent_pos":
@@ -322,20 +322,12 @@ def _set_gripper(self, gripper_pos, gripper_rotation):
         self.data.qpos[12] = 0.0
 
     def render(self):
-        return self._render(render_type="visualization")
+        return self._render(renderer=self.visualization_renderer)
 
-    def _render(self, render_type):
+    def _render(self, renderer: MujocoRenderer):
         self._render_callback()
-        if render_type == "visualization":
-            render = self.visualization_renderer.render(self.render_mode, camera_name="camera0")
-            if self.render_mode != "human":
-                render = render.copy()
-        elif render_type == "observation":
-            render = self.observation_renderer.render("rgb_array", camera_name="camera0").copy()
-        else:
-            raise ValueError(render_type)
-
-        return render
+        render = renderer.render(self.render_mode, camera_name="camera0")
+        return render.copy() if render is not None else None
 
     def _render_callback(self):
         self._mujoco.mj_forward(self.model, self.data)

From 9a03c0a65302fad768f1dcf80e438a013fd60f5c Mon Sep 17 00:00:00 2001
From: Simon Alibert <75076266+aliberts@users.noreply.github.com>
Date: Sun, 7 Apr 2024 14:22:41 +0200
Subject: [PATCH 14/16] Remove GoalEnv mention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com>
---
 gym_xarm/tasks/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gym_xarm/tasks/base.py b/gym_xarm/tasks/base.py
index 7e2ddc4..4ab86d5 100644
--- a/gym_xarm/tasks/base.py
+++ b/gym_xarm/tasks/base.py
@@ -216,7 +216,7 @@ def reset(
             options (optional dictionary): Can be used when `reset` is override for additional information to specify how the environment is reset.
 
         Returns:
-            observation (dictionary) : Observation of the initial state. It should satisfy the `GoalEnv` :attr:`observation_space`.
+            observation (dictionary) : Observation of the initial state.
             info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to
                 the ``info`` returned by :meth:`step`.
         """

From 14875d37f4125423510f848eea7537e6fb5c5bf9 Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sun, 7 Apr 2024 14:37:07 +0200
Subject: [PATCH 15/16] Add example, Update README

---
 .pre-commit-config.yaml |  2 +-
 README.md               | 62 ++++++++++++++++++++++++++++++++++++-----
 example.py              | 15 ++++++++++
 3 files changed, 71 insertions(+), 8 deletions(-)
 create mode 100644 example.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1f23d75..d998dac 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-exclude: ^(data/|tests/)
+exclude: ^(data/|tests/|example.py)
 default_language_version:
     python: python3.10
 repos:
diff --git a/README.md b/README.md
index e47ee1a..4312704 100644
--- a/README.md
+++ b/README.md
@@ -5,17 +5,11 @@ A gym environment for xArm
 <td><img src="http://remicadene.com/assets/gif/simxarm_tdmpc.gif" width="50%" alt="TDMPC policy on xArm env"/></td>
 
 
-## Acknowledgment
-
-gym-xarm is adapted from [FOWM](https://www.yunhaifeng.com/FOWM/)
-
-
 ## Installation
 
 Create a virtual environment with Python 3.10 and activate it, e.g. with [`miniconda`](https://docs.anaconda.com/free/miniconda/index.html):
 ```bash
-conda create -y -n xarm python=3.10
-conda activate xarm
+conda create -y -n xarm python=3.10 && conda activate xarm
 ```
 
 Install gym-xarm:
@@ -24,6 +18,50 @@ pip install gym-xarm
 ```
 
 
+## Quickstart
+
+```python
+# example.py
+import gymnasium as gym
+import gym_xarm
+
+env = gym.make("gym_xarm/XarmLift-v0", render_mode="human")
+observation, info = env.reset()
+
+for _ in range(1000):
+    action = env.action_space.sample()
+    observation, reward, terminated, truncated, info = env.step(action)
+    image = env.render()
+
+    if terminated or truncated:
+        observation, info = env.reset()
+
+env.close()
+```
+
+To use this [example](./example.py) with `render_mode="human"`, you should set the environment variable `export MUJOCO_GL=glfw` or simply run
+```bash
+MUJOCO_GL=glfw python example.py
+```
+
+## Description for `Lift` task
+
+The goal of the agent is to lift the block above a height threshold. The agent is an xArm robot arm and the block is a cube.
+
+### Action Space
+
+The action space is continuous and consists of four values [x, y, z, w]:
+- [x, y, z] represent the position of the end effector
+- [w] represents the gripper control
+
+### Observation Space
+
+Observation space is dependent on the value set to `obs_type`:
+- `"state"`: observations contain agent and object state vectors only (no rendering)
+- `"pixels"`: observations contains rendered image only (no state vectors)
+- `"pixels_agent_pos"`: contains rendered image and agent state vector
+
+
 ## Contribute
 
 Instead of using `pip` directly, we use `poetry` for development purposes to easily track our dependencies.
@@ -50,3 +88,13 @@ pre-commit install
 # apply style and linter checks on staged files
 pre-commit
 ```
+
+
+## Acknowledgment
+
+gym-xarm is adapted from [FOWM](https://www.yunhaifeng.com/FOWM/) and is based on work by [Nicklas Hansen](https://nicklashansen.github.io/), [Yanjie Ze](https://yanjieze.com/), [Rishabh Jangir](https://jangirrishabh.github.io/), [Mohit Jain](https://natsu6767.github.io/), and [Sambaran Ghosal](https://github.com/SambaranRepo) as part of the following publications:
+* [Self-Supervised Policy Adaptation During Deployment](https://arxiv.org/abs/2007.04309)
+* [Generalization in Reinforcement Learning by Soft Data Augmentation](https://arxiv.org/abs/2011.13389)
+* [Stabilizing Deep Q-Learning with ConvNets and Vision Transformers under Data Augmentation](https://arxiv.org/abs/2107.00644)
+* [Look Closer: Bridging Egocentric and Third-Person Views with Transformers for Robotic Manipulation](https://arxiv.org/abs/2201.07779)
+* [Visual Reinforcement Learning with Self-Supervised 3D Representations](https://arxiv.org/abs/2210.07241)
diff --git a/example.py b/example.py
new file mode 100644
index 0000000..40d565b
--- /dev/null
+++ b/example.py
@@ -0,0 +1,15 @@
+import gymnasium as gym
+import gym_xarm
+
+env = gym.make("gym_xarm/XarmLift-v0", render_mode="human")
+observation, info = env.reset()
+
+for _ in range(1000):
+    action = env.action_space.sample()
+    observation, reward, terminated, truncated, info = env.step(action)
+    image = env.render()
+
+    if terminated or truncated:
+        observation, info = env.reset()
+
+env.close()

From 07a139ab2f898beacc2a59e52dc3cea1b5dca96a Mon Sep 17 00:00:00 2001
From: Simon Alibert <alibert.sim@gmail.com>
Date: Sun, 7 Apr 2024 14:41:15 +0200
Subject: [PATCH 16/16] pre-commit run -a

---
 .pre-commit-config.yaml | 2 +-
 poetry.lock             | 3 +--
 tests/test_env.py       | 3 ++-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d998dac..bb3fc90 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-exclude: ^(data/|tests/|example.py)
+exclude: ^(example.py)
 default_language_version:
     python: python3.10
 repos:
diff --git a/poetry.lock b/poetry.lock
index dc06fe1..a293551 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.0 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -755,7 +755,6 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
diff --git a/tests/test_env.py b/tests/test_env.py
index 098f25e..8e11f68 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -1,9 +1,10 @@
-import pytest
 import gymnasium as gym
+import pytest
 from gymnasium.utils.env_checker import check_env
 
 import gym_xarm  # noqa: F401
 
+
 @pytest.mark.parametrize(
     "env_task, obs_type",
     [