huggingface · aliberts · Apr 7, 2024 · Apr 4, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -13,7 +13,6 @@ jobs:
     runs-on: ubuntu-latest
     env:
       POETRY_VERSION: 1.8.2
-      MUJOCO_GL: egl
     steps:
       #----------------------------------------------
       #       check-out repo and set-up python
@@ -84,8 +83,8 @@ jobs:
           path: .venv
           key: venv-${{ steps.setup-python.outputs.python-version }}-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
 
-      # - name: Install libegl1-mesa-dev (to use MUJOCO_GL=egl)
-      #   run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
+      - name: Install libegl1-mesa-dev
+        run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
 
       #----------------------------------------------
       #             install project

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-exclude: ^(data/|tests/)
+exclude: ^(example.py)
 default_language_version:
     python: python3.10
 repos:

diff --git a/README.md b/README.md
@@ -5,17 +5,11 @@ A gym environment for xArm
 <td><img src="http://remicadene.com/assets/gif/simxarm_tdmpc.gif" width="50%" alt="TDMPC policy on xArm env"/></td>
 
 
-## Acknowledgment
-
-gym-xarm is adapted from [FOWM](https://www.yunhaifeng.com/FOWM/)
-
-
 ## Installation
 
 Create a virtual environment with Python 3.10 and activate it, e.g. with [`miniconda`](https://docs.anaconda.com/free/miniconda/index.html):
 ```bash
-conda create -y -n xarm python=3.10
-conda activate xarm
+conda create -y -n xarm python=3.10 && conda activate xarm
 ```
 
 Install gym-xarm:
@@ -24,6 +18,50 @@ pip install gym-xarm
 ```
 
 
+## Quickstart
+
+```python
+# example.py
+import gymnasium as gym
+import gym_xarm
+
+env = gym.make("gym_xarm/XarmLift-v0", render_mode="human")
+observation, info = env.reset()
+
+for _ in range(1000):
+    action = env.action_space.sample()
+    observation, reward, terminated, truncated, info = env.step(action)
+    image = env.render()
+
+    if terminated or truncated:
+        observation, info = env.reset()
+
+env.close()
+```
+
+To use this [example](./example.py) with `render_mode="human"`, you should set the environment variable `export MUJOCO_GL=glfw` or simply run
+```bash
+MUJOCO_GL=glfw python example.py
+```
+
+## Description for `Lift` task
+
+The goal of the agent is to lift the block above a height threshold. The agent is an xArm robot arm and the block is a cube.
+
+### Action Space
+
+The action space is continuous and consists of four values [x, y, z, w]:
+- [x, y, z] represent the position of the end effector
+- [w] represents the gripper control
+
+### Observation Space
+
+Observation space is dependent on the value set to `obs_type`:
+- `"state"`: observations contain agent and object state vectors only (no rendering)
+- `"pixels"`: observations contains rendered image only (no state vectors)
+- `"pixels_agent_pos"`: contains rendered image and agent state vector
+
+
 ## Contribute
 
 Instead of using `pip` directly, we use `poetry` for development purposes to easily track our dependencies.
@@ -50,3 +88,13 @@ pre-commit install
 # apply style and linter checks on staged files
 pre-commit
 ```
+
+
+## Acknowledgment
+
+gym-xarm is adapted from [FOWM](https://www.yunhaifeng.com/FOWM/) and is based on work by [Nicklas Hansen](https://nicklashansen.github.io/), [Yanjie Ze](https://yanjieze.com/), [Rishabh Jangir](https://jangirrishabh.github.io/), [Mohit Jain](https://natsu6767.github.io/), and [Sambaran Ghosal](https://github.com/SambaranRepo) as part of the following publications:
+* [Self-Supervised Policy Adaptation During Deployment](https://arxiv.org/abs/2007.04309)
+* [Generalization in Reinforcement Learning by Soft Data Augmentation](https://arxiv.org/abs/2011.13389)
+* [Stabilizing Deep Q-Learning with ConvNets and Vision Transformers under Data Augmentation](https://arxiv.org/abs/2107.00644)
+* [Look Closer: Bridging Egocentric and Third-Person Views with Transformers for Robotic Manipulation](https://arxiv.org/abs/2201.07779)
+* [Visual Reinforcement Learning with Self-Supervised 3D Representations](https://arxiv.org/abs/2210.07241)
diff --git a/example.py b/example.py
@@ -0,0 +1,15 @@
+import gymnasium as gym
+import gym_xarm
+
+env = gym.make("gym_xarm/XarmLift-v0", render_mode="human")
+observation, info = env.reset()
+
+for _ in range(1000):
+    action = env.action_space.sample()
+    observation, reward, terminated, truncated, info = env.step(action)
+    image = env.render()
+
+    if terminated or truncated:
+        observation, info = env.reset()
+
+env.close()
diff --git a/gym_xarm/__init__.py b/gym_xarm/__init__.py
@@ -4,5 +4,5 @@
     id="gym_xarm/XarmLift-v0",
     entry_point="gym_xarm.tasks:Lift",
     max_episode_steps=300,
-    kwargs={"obs_mode": "state"},
+    kwargs={"obs_type": "state"},
 )
diff --git a/gym_xarm/tasks/__init__.py b/gym_xarm/tasks/__init__.py
@@ -1,44 +1,42 @@
-from collections import OrderedDict, deque
-
-import gymnasium as gym
-import numpy as np
-from gymnasium.wrappers import TimeLimit
+from collections import OrderedDict
 
 from gym_xarm.tasks.base import Base as Base
 from gym_xarm.tasks.lift import Lift
-from gym_xarm.tasks.peg_in_box import PegInBox
-from gym_xarm.tasks.push import Push
-from gym_xarm.tasks.reach import Reach
+
+# from gym_xarm.tasks.peg_in_box import PegInBox
+# from gym_xarm.tasks.push import Push
+# from gym_xarm.tasks.reach import Reach
+
 
 TASKS = OrderedDict(
     (
-        (
-            "reach",
-            {
-                "env": Reach,
-                "action_space": "xyz",
-                "episode_length": 50,
-                "description": "Reach a target location with the end effector",
-            },
-        ),
-        (
-            "push",
-            {
-                "env": Push,
-                "action_space": "xyz",
-                "episode_length": 50,
-                "description": "Push a cube to a target location",
-            },
-        ),
-        (
-            "peg_in_box",
-            {
-                "env": PegInBox,
-                "action_space": "xyz",
-                "episode_length": 50,
-                "description": "Insert a peg into a box",
-            },
-        ),
+        # (
+        #     "reach",
+        #     {
+        #         "env": Reach,
+        #         "action_space": "xyz",
+        #         "episode_length": 50,
+        #         "description": "Reach a target location with the end effector",
+        #     },
+        # ),
+        # (
+        #     "push",
+        #     {
+        #         "env": Push,
+        #         "action_space": "xyz",
+        #         "episode_length": 50,
+        #         "description": "Push a cube to a target location",
+        #     },
+        # ),
+        # (
+        #     "peg_in_box",
+        #     {
+        #         "env": PegInBox,
+        #         "action_space": "xyz",
+        #         "episode_length": 50,
+        #         "description": "Insert a peg into a box",
+        #     },
+        # ),
         (
             "lift",
             {
@@ -50,121 +48,3 @@
         ),
     )
 )
-
-
-class SimXarmWrapper(gym.Wrapper):
-    """
-    DEPRECATED: Use gym.make()
-
-    A wrapper for the SimXarm environments. This wrapper is used to
-    convert the action and observation spaces to the correct format.
-    """
-
-    def __init__(self, env, task, obs_mode, image_size, action_repeat, frame_stack=1, channel_last=False):
-        super().__init__(env)
-        self._env = env
-        self.obs_mode = obs_mode
-        self.image_size = image_size
-        self.action_repeat = action_repeat
-        self.frame_stack = frame_stack
-        self._frames = deque([], maxlen=frame_stack)
-        self.channel_last = channel_last
-        self._max_episode_steps = task["episode_length"] // action_repeat
-
-        image_shape = (
-            (image_size, image_size, 3 * frame_stack)
-            if channel_last
-            else (3 * frame_stack, image_size, image_size)
-        )
-        if obs_mode == "state":
-            self.observation_space = env.observation_space["observation"]
-        elif obs_mode == "rgb":
-            self.observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)
-        elif obs_mode == "all":
-            self.observation_space = gym.spaces.Dict(
-                state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),
-                rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
-            )
-        else:
-            raise ValueError(f"Unknown obs_mode {obs_mode}. Must be one of [rgb, all, state]")
-        self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(task["action_space"]),))
-        self.action_padding = np.zeros(4 - len(task["action_space"]), dtype=np.float32)
-        if "w" not in task["action_space"]:
-            self.action_padding[-1] = 1.0
-
-    def _render_obs(self):
-        obs = self.render(mode="rgb_array", width=self.image_size, height=self.image_size)
-        if not self.channel_last:
-            obs = obs.transpose(2, 0, 1)
-        return obs.copy()
-
-    def _update_frames(self, reset=False):
-        pixels = self._render_obs()
-        self._frames.append(pixels)
-        if reset:
-            for _ in range(1, self.frame_stack):
-                self._frames.append(pixels)
-        assert len(self._frames) == self.frame_stack
-
-    def transform_obs(self, obs, reset=False):
-        if self.obs_mode == "state":
-            return obs["observation"]
-        elif self.obs_mode == "rgb":
-            self._update_frames(reset=reset)
-            rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
-            return rgb_obs
-        elif self.obs_mode == "all":
-            self._update_frames(reset=reset)
-            rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
-            return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
-        else:
-            raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]")
-
-    def reset(self):
-        return self.transform_obs(self._env.reset(), reset=True)
-
-    def step(self, action):
-        action = np.concatenate([action, self.action_padding])
-        reward = 0.0
-        for _ in range(self.action_repeat):
-            obs, r, done, info = self._env.step(action)
-            reward += r
-        return self.transform_obs(obs), reward, done, info
-
-    def render(self, mode="rgb_array", width=384, height=384, **kwargs):
-        return self._env.render(mode, width=width, height=height)
-
-    @property
-    def state(self):
-        return self._env.robot_state
-
-
-def make(task, obs_mode="state", image_size=84, action_repeat=1, frame_stack=1, channel_last=False, seed=0):
-    """
-    DEPRECATED: Use gym.make()
-
-    Create a new environment.
-    Args:
-            task (str): The task to create an environment for. Must be one of:
-                    - 'reach'
-                    - 'push'
-                    - 'peg-in-box'
-                    - 'lift'
-            obs_mode (str): The observation mode to use. Must be one of:
-                    - 'state': Only state observations
-                    - 'rgb': RGB images
-                    - 'all': RGB images and state observations
-            image_size (int): The size of the image observations
-            action_repeat (int): The number of times to repeat the action
-            seed (int): The random seed to use
-    Returns:
-            gym.Env: The environment
-    """
-    if task not in TASKS:
-        raise ValueError(f"Unknown task {task}. Must be one of {list(TASKS.keys())}")
-    env = TASKS[task]["env"]()
-    env = TimeLimit(env, TASKS[task]["episode_length"])
-    env = SimXarmWrapper(env, TASKS[task], obs_mode, image_size, action_repeat, frame_stack, channel_last)
-    env.seed(seed)
-
-    return env