From 4ea670491eaef66178a1ffe3d672c7d4344c51bf Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Mon, 30 May 2022 16:13:00 -0400 Subject: [PATCH] Improvements for Reinforcement Learning (#78) * Drop Python 3.6 support * Move limits to config, enable hand brake, normalize reward --- .github/workflows/ci.yml | 2 +- HISTORY.rst | 6 +++- gym_donkeycar/__init__.py | 4 +-- gym_donkeycar/core/fps.py | 2 +- gym_donkeycar/core/message.py | 2 +- gym_donkeycar/envs/donkey_env.py | 54 +++++++++++++++++++------------ gym_donkeycar/envs/donkey_proc.py | 2 +- gym_donkeycar/envs/donkey_sim.py | 36 +++++++++++++++++---- gym_donkeycar/version.txt | 2 +- setup.py | 10 +++--- 10 files changed, 79 insertions(+), 41 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b5490e6d..3cf4517ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 diff --git a/HISTORY.rst b/HISTORY.rst index e4a59aa5b..ee38330c9 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,8 +2,12 @@ History ======= -1.2.0 (WIP) +1.3.0 (WIP) ------------------ +* Dropped Python 3.6 support, pinned Gym to version 0.21 +* Move steer limits and throttle limits to config dict +* Normalized reward and use squared error for CTE +* Enabled hand brake in ``send_control()`` and at reset time * Added type hints to most core methods * Added ``send_lidar_config()`` method to configure LIDAR * Added car roll, pitch yaw angle diff --git a/gym_donkeycar/__init__.py b/gym_donkeycar/__init__.py index bace1796b..3c5e95239 100644 --- a/gym_donkeycar/__init__.py +++ b/gym_donkeycar/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Top-level package for OpenAI Gym Environments for Donkey Car.""" import os @@ -21,7 +19,7 @@ # Read version from file version_file = os.path.join(os.path.dirname(__file__), "version.txt") -with open(version_file, "r") as file_handler: +with open(version_file) as file_handler: __version__ = file_handler.read().strip() __author__ = """Tawn Kramer""" diff --git a/gym_donkeycar/core/fps.py b/gym_donkeycar/core/fps.py index 9f69922bb..e848e854d 100644 --- a/gym_donkeycar/core/fps.py +++ b/gym_donkeycar/core/fps.py @@ -1,7 +1,7 @@ import time -class FPSTimer(object): +class FPSTimer: """ Every N on_frame events, give the average iterations per interval. """ diff --git a/gym_donkeycar/core/message.py b/gym_donkeycar/core/message.py index a6003ddce..f7c5a0267 100755 --- a/gym_donkeycar/core/message.py +++ b/gym_donkeycar/core/message.py @@ -9,7 +9,7 @@ from gym_donkeycar.core.client import SDClient -class IMesgHandler(object): +class IMesgHandler: def on_connect(self, client: SDClient) -> None: pass diff --git a/gym_donkeycar/envs/donkey_env.py b/gym_donkeycar/envs/donkey_env.py index f461d34a2..09639fcde 100755 --- a/gym_donkeycar/envs/donkey_env.py +++ b/gym_donkeycar/envs/donkey_env.py @@ -19,14 +19,24 @@ def supply_defaults(conf: Dict[str, Any]) -> None: + """ + Update the config dictonnary + with defaults when values are missing. + + :param conf: The user defined config dict, + passed to the environment constructor. + """ defaults = [ ("start_delay", 5.0), - ("max_cte", 5.0), + ("max_cte", 8.0), ("frame_skip", 1), ("cam_resolution", (120, 160, 3)), ("log_level", logging.INFO), ("host", "localhost"), ("port", 9091), + ("steer_limit", 1.0), + ("throttle_min", 0.0), + ("throttle_max", 1.0), ] for key, val in defaults: @@ -46,10 +56,6 @@ class DonkeyEnv(gym.Env): metadata = {"render.modes": ["human", "rgb_array"]} ACTION_NAMES: List[str] = ["steer", "throttle"] - STEER_LIMIT_LEFT: float = -1.0 - STEER_LIMIT_RIGHT: float = 1.0 - THROTTLE_MIN: float = 0.0 - THROTTLE_MAX: float = 1.0 VAL_PER_PIXEL: int = 255 def __init__(self, level: str, conf: Optional[Dict[str, Any]] = None): @@ -66,7 +72,7 @@ def __init__(self, level: str, conf: Optional[Dict[str, Any]] = None): supply_defaults(conf) # set logging level - logging.basicConfig(level=conf["log_level"]) # pytype: disable=key-error + logging.basicConfig(level=conf["log_level"]) logger.debug("DEBUG ON") logger.debug(conf) @@ -84,10 +90,12 @@ def __init__(self, level: str, conf: Optional[Dict[str, Any]] = None): # start simulation com self.viewer = DonkeyUnitySimContoller(conf=conf) + # Note: for some RL algorithms, it would be better to normalize the action space to [-1, 1] + # and then rescale to proper limtis # steering and throttle self.action_space = spaces.Box( - low=np.array([self.STEER_LIMIT_LEFT, self.THROTTLE_MIN]), - high=np.array([self.STEER_LIMIT_RIGHT, self.THROTTLE_MAX]), + low=np.array([-float(conf["steer_limit"]), float(conf["throttle_min"])]), + high=np.array([float(conf["steer_limit"]), float(conf["throttle_max"])]), dtype=np.float32, ) @@ -98,7 +106,7 @@ def __init__(self, level: str, conf: Optional[Dict[str, Any]] = None): self.seed() # Frame Skipping - self.frame_skip = conf["frame_skip"] # pytype: disable=key-error + self.frame_skip = conf["frame_skip"] # wait until the car is loaded in the scene self.viewer.wait_until_loaded() @@ -129,9 +137,13 @@ def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict[str, A return observation, reward, done, info def reset(self) -> np.ndarray: + # Activate hand brake, so the car does not move + self.viewer.handler.send_control(0, 0, 1.0) + time.sleep(0.1) self.viewer.reset() + self.viewer.handler.send_control(0, 0, 1.0) + time.sleep(0.1) observation, reward, done, info = self.viewer.observe() - time.sleep(1) return observation def render(self, mode: str = "human", close: bool = False) -> Optional[np.ndarray]: @@ -149,54 +161,54 @@ def is_game_over(self) -> bool: class GeneratedRoadsEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(GeneratedRoadsEnv, self).__init__(level="generated_road", *args, **kwargs) + super().__init__(level="generated_road", *args, **kwargs) class WarehouseEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(WarehouseEnv, self).__init__(level="warehouse", *args, **kwargs) + super().__init__(level="warehouse", *args, **kwargs) class AvcSparkfunEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(AvcSparkfunEnv, self).__init__(level="sparkfun_avc", *args, **kwargs) + super().__init__(level="sparkfun_avc", *args, **kwargs) class GeneratedTrackEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(GeneratedTrackEnv, self).__init__(level="generated_track", *args, **kwargs) + super().__init__(level="generated_track", *args, **kwargs) class MountainTrackEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(MountainTrackEnv, self).__init__(level="mountain_track", *args, **kwargs) + super().__init__(level="mountain_track", *args, **kwargs) class RoboRacingLeagueTrackEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(RoboRacingLeagueTrackEnv, self).__init__(level="roboracingleague_1", *args, **kwargs) + super().__init__(level="roboracingleague_1", *args, **kwargs) class WaveshareEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(WaveshareEnv, self).__init__(level="waveshare", *args, **kwargs) + super().__init__(level="waveshare", *args, **kwargs) class MiniMonacoEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(MiniMonacoEnv, self).__init__(level="mini_monaco", *args, **kwargs) + super().__init__(level="mini_monaco", *args, **kwargs) class WarrenTrackEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(WarrenTrackEnv, self).__init__(level="warren", *args, **kwargs) + super().__init__(level="warren", *args, **kwargs) class ThunderhillTrackEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(ThunderhillTrackEnv, self).__init__(level="thunderhill", *args, **kwargs) + super().__init__(level="thunderhill", *args, **kwargs) class CircuitLaunchEnv(DonkeyEnv): def __init__(self, *args, **kwargs): - super(CircuitLaunchEnv, self).__init__(level="circuit_launch", *args, **kwargs) + super().__init__(level="circuit_launch", *args, **kwargs) diff --git a/gym_donkeycar/envs/donkey_proc.py b/gym_donkeycar/envs/donkey_proc.py index 68167e344..2f2a974ef 100644 --- a/gym_donkeycar/envs/donkey_proc.py +++ b/gym_donkeycar/envs/donkey_proc.py @@ -7,7 +7,7 @@ import subprocess -class DonkeyUnityProcess(object): +class DonkeyUnityProcess: def __init__(self): self.proc1 = None diff --git a/gym_donkeycar/envs/donkey_sim.py b/gym_donkeycar/envs/donkey_sim.py index d7b98c04e..8a3e98760 100755 --- a/gym_donkeycar/envs/donkey_sim.py +++ b/gym_donkeycar/envs/donkey_sim.py @@ -6,6 +6,7 @@ import base64 import logging import math +import os import time import types from io import BytesIO @@ -50,9 +51,11 @@ def set_episode_over_fn(self, ep_over_fn: Callable) -> None: self.handler.set_episode_over_fn(ep_over_fn) def wait_until_loaded(self) -> None: + time.sleep(0.1) while not self.handler.loaded: logger.warning("waiting for sim to start..") - time.sleep(3.0) + time.sleep(1.0) + logger.info("sim started!") def reset(self) -> None: self.handler.reset() @@ -440,17 +443,22 @@ def set_reward_fn(self, reward_fn: Callable[[], float]): logger.debug("custom reward fn set.") def calc_reward(self, done: bool) -> float: + # Normalization factor, real max speed is around 30 + # but only attained on a long straight line + max_speed = 10 + if done: return -1.0 if self.cte > self.max_cte: return -1.0 + # Collision if self.hit != "none": return -2.0 - # going fast close to the center of lane yeilds best reward - return (1.0 - (math.fabs(self.cte) / self.max_cte)) * self.speed + # going fast close to the center of lane yields best reward + return (1.0 - (self.cte / self.max_cte) ** 2) * (self.speed / max_speed) # ------ Socket interface ----------- # @@ -560,6 +568,10 @@ def determine_episode_over(self): logger.debug("disqualified") self.over = True + # Disable reset + if os.environ.get("RACE") == "True": + self.over = False + def on_scene_selection_ready(self, message: Dict[str, Any]) -> None: logger.debug("SceneSelectionReady") self.send_get_scene_names() @@ -567,6 +579,8 @@ def on_scene_selection_ready(self, message: Dict[str, Any]) -> None: def on_car_loaded(self, message: Dict[str, Any]) -> None: logger.debug("car loaded") self.loaded = True + # Enable hand brake, so the car doesn't move + self.send_control(0, 0, 1.0) self.on_need_car_config({}) def on_recv_scene_names(self, message: Dict[str, Any]) -> None: @@ -579,14 +593,22 @@ def on_recv_scene_names(self, message: Dict[str, Any]) -> None: else: raise ValueError(f"Scene name {self.SceneToLoad} not in scene list {names}") - def send_control(self, steer: float, throttle: float) -> None: + def send_control(self, steer: float, throttle: float, brake: float = 0.0) -> None: + """ + Send command to simulator. + + :param steer: desired steering + :param throttle: desired throttle + :param brake: whether to activate or not hand brake + (can be a continuous value) + """ if not self.loaded: return msg = { "msg_type": "control", - "steering": steer.__str__(), - "throttle": throttle.__str__(), - "brake": "0.0", + "steering": str(steer), + "throttle": str(throttle), + "brake": str(brake), } self.queue_message(msg) diff --git a/gym_donkeycar/version.txt b/gym_donkeycar/version.txt index 26aaba0e8..f0bb29e76 100644 --- a/gym_donkeycar/version.txt +++ b/gym_donkeycar/version.txt @@ -1 +1 @@ -1.2.0 +1.3.0 diff --git a/setup.py b/setup.py index a955b589d..e2b277286 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,9 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- import os from setuptools import find_packages, setup -with open(os.path.join("gym_donkeycar", "version.txt"), "r") as file_handler: +with open(os.path.join("gym_donkeycar", "version.txt")) as file_handler: __version__ = file_handler.read().strip() description = "OpenAI Gym Environments for Donkey Car" @@ -16,20 +15,23 @@ with open("HISTORY.rst") as history_file: history = history_file.read() -requirements = ["gym", "numpy", "pillow"] +# gym 0.23 introduces breaking changes +requirements = ["gym==0.21", "numpy", "pillow"] setup( name="gym_donkeycar", author="Tawn Kramer", author_email="tawnkramer@gmail.com", + python_requires=">=3.7", classifiers=[ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], description=description, install_requires=requirements,