Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop python 3.8, add python 3.12 support #263

Merged
merged 2 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
include:
# Default version
- gymnasium-version: "1.0.0"
Expand Down Expand Up @@ -51,6 +51,7 @@ jobs:
- name: Install specific version of gym
run: |
uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
uv pip install --system "numpy<2"
# Only run for python 3.10, downgrade gym to 0.29.1

- name: Lint with ruff
Expand All @@ -65,8 +66,6 @@ jobs:
- name: Type check
run: |
make type
# Do not run for python 3.8 (mypy internal error)
if: matrix.python-version != '3.8'
- name: Test with pytest
run: |
make pytest
2 changes: 1 addition & 1 deletion docs/conda_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- cloudpickle
- opencv-python-headless
- pandas
- numpy>=1.20,<2.0
- numpy>=1.20,<3.0
- matplotlib
- sphinx>=5,<8
- sphinx_rtd_theme>=1.3.0
Expand Down
3 changes: 1 addition & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import datetime
import os
import sys
from typing import Dict

# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
# PyEnchant.
Expand Down Expand Up @@ -151,7 +150,7 @@ def setup(app):

# -- Options for LaTeX output ------------------------------------------------

latex_elements: Dict[str, str] = {
latex_elements: dict[str, str] = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
Expand Down
25 changes: 25 additions & 0 deletions docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,31 @@
Changelog
==========

Release 2.5.0a0 (WIP)
--------------------------

Breaking Changes:
^^^^^^^^^^^^^^^^^
- Upgraded to PyTorch 2.3.0
- Dropped Python 3.8 support

New Features:
^^^^^^^^^^^^^
- Added Python 3.12 support
- Added Numpy v2.0 support

Bug Fixes:
^^^^^^^^^^

Deprecations:
^^^^^^^^^^^^^

Others:
^^^^^^^

Documentation:
^^^^^^^^^^^^^^

Release 2.4.0 (2024-11-18)
--------------------------

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[tool.ruff]
# Same as Black.
line-length = 127
# Assume Python 3.8
target-version = "py38"
# Assume Python 3.9
target-version = "py39"

[tool.ruff.lint]
select = ["E", "F", "B", "UP", "C90", "RUF"]
Expand Down
14 changes: 7 additions & 7 deletions sb3_contrib/ars/ars.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import time
import warnings
from functools import partial
from typing import Any, ClassVar, Dict, Optional, Type, TypeVar, Union
from typing import Any, ClassVar, Optional, TypeVar, Union

import numpy as np
import torch as th
Expand Down Expand Up @@ -50,14 +50,14 @@ class ARS(BaseAlgorithm):
:param _init_setup_model: Whether or not to build the network at the creation of the instance
"""

policy_aliases: ClassVar[Dict[str, Type[BasePolicy]]] = {
policy_aliases: ClassVar[dict[str, type[BasePolicy]]] = {
"MlpPolicy": MlpPolicy,
"LinearPolicy": LinearPolicy,
}

def __init__(
self,
policy: Union[str, Type[ARSPolicy]],
policy: Union[str, type[ARSPolicy]],
env: Union[GymEnv, str],
n_delta: int = 8,
n_top: Optional[int] = None,
Expand All @@ -66,7 +66,7 @@ def __init__(
zero_policy: bool = True,
alive_bonus_offset: float = 0,
n_eval_episodes: int = 1,
policy_kwargs: Optional[Dict[str, Any]] = None,
policy_kwargs: Optional[dict[str, Any]] = None,
stats_window_size: int = 100,
tensorboard_log: Optional[str] = None,
seed: Optional[int] = None,
Expand Down Expand Up @@ -144,8 +144,8 @@ def _mimic_monitor_wrapper(self, episode_rewards: np.ndarray, episode_lengths: n

def _trigger_callback(
self,
_locals: Dict[str, Any],
_globals: Dict[str, Any],
_locals: dict[str, Any],
_globals: dict[str, Any],
callback: BaseCallback,
n_envs: int,
) -> None:
Expand Down Expand Up @@ -353,7 +353,7 @@ def learn(

def set_parameters(
self,
load_path_or_dict: Union[str, Dict[str, Dict]],
load_path_or_dict: Union[str, dict[str, dict]],
exact_match: bool = True,
device: Union[th.device, str] = "auto",
) -> None:
Expand Down
8 changes: 4 additions & 4 deletions sb3_contrib/ars/policies.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Type
from typing import Any, Optional

import torch as th
from gymnasium import spaces
Expand Down Expand Up @@ -26,8 +26,8 @@ def __init__(
self,
observation_space: spaces.Space,
action_space: spaces.Space,
net_arch: Optional[List[int]] = None,
activation_fn: Type[nn.Module] = nn.ReLU,
net_arch: Optional[list[int]] = None,
activation_fn: type[nn.Module] = nn.ReLU,
with_bias: bool = True,
squash_output: bool = True,
):
Expand Down Expand Up @@ -57,7 +57,7 @@ def __init__(

self.action_net = nn.Sequential(*actor_net)

def _get_constructor_parameters(self) -> Dict[str, Any]:
def _get_constructor_parameters(self) -> dict[str, Any]:
# data = super()._get_constructor_parameters() this adds normalize_images, which we don't support...
data = dict(
observation_space=self.observation_space,
Expand Down
20 changes: 10 additions & 10 deletions sb3_contrib/common/envs/invalid_actions_env.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional
from typing import Optional

import numpy as np
from gymnasium import spaces
Expand All @@ -23,7 +23,7 @@ def __init__(
space = spaces.Discrete(dim)
self.n_invalid_actions = n_invalid_actions
self.possible_actions = np.arange(space.n)
self.invalid_actions: List[int] = []
self.invalid_actions: list[int] = []
super().__init__(space=space, ep_length=ep_length)

def _choose_next_state(self) -> None:
Expand All @@ -32,7 +32,7 @@ def _choose_next_state(self) -> None:
potential_invalid_actions = [i for i in self.possible_actions if i != self.state]
self.invalid_actions = np.random.choice(potential_invalid_actions, self.n_invalid_actions, replace=False).tolist()

def action_masks(self) -> List[bool]:
def action_masks(self) -> list[bool]:
return [action not in self.invalid_actions for action in self.possible_actions]


Expand All @@ -45,7 +45,7 @@ class InvalidActionEnvMultiDiscrete(IdentityEnv[np.ndarray]):

def __init__(
self,
dims: Optional[List[int]] = None,
dims: Optional[list[int]] = None,
ep_length: int = 100,
n_invalid_actions: int = 0,
):
Expand All @@ -58,13 +58,13 @@ def __init__(
space = spaces.MultiDiscrete(dims)
self.n_invalid_actions = n_invalid_actions
self.possible_actions = np.arange(sum(dims))
self.invalid_actions: List[int] = []
self.invalid_actions: list[int] = []
super().__init__(space=space, ep_length=ep_length)

def _choose_next_state(self) -> None:
self.state = self.action_space.sample()

converted_state: List[int] = []
converted_state: list[int] = []
running_total = 0
for i in range(len(self.action_space.nvec)):
converted_state.append(running_total + self.state[i])
Expand All @@ -74,7 +74,7 @@ def _choose_next_state(self) -> None:
potential_invalid_actions = [i for i in self.possible_actions if i not in converted_state]
self.invalid_actions = np.random.choice(potential_invalid_actions, self.n_invalid_actions, replace=False).tolist()

def action_masks(self) -> List[bool]:
def action_masks(self) -> list[bool]:
return [action not in self.invalid_actions for action in self.possible_actions]


Expand All @@ -99,13 +99,13 @@ def __init__(
self.n_dims = dims
self.n_invalid_actions = n_invalid_actions
self.possible_actions = np.arange(2 * dims)
self.invalid_actions: List[int] = []
self.invalid_actions: list[int] = []
super().__init__(space=space, ep_length=ep_length)

def _choose_next_state(self) -> None:
self.state = self.action_space.sample()

converted_state: List[int] = []
converted_state: list[int] = []
running_total = 0
for i in range(self.n_dims):
converted_state.append(running_total + self.state[i])
Expand All @@ -115,5 +115,5 @@ def _choose_next_state(self) -> None:
potential_invalid_actions = [i for i in self.possible_actions if i not in converted_state]
self.invalid_actions = np.random.choice(potential_invalid_actions, self.n_invalid_actions, replace=False).tolist()

def action_masks(self) -> List[bool]:
def action_masks(self) -> list[bool]:
return [action not in self.invalid_actions for action in self.possible_actions]
3 changes: 2 additions & 1 deletion sb3_contrib/common/maskable/buffers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Generator, NamedTuple, Optional, Union
from collections.abc import Generator
from typing import NamedTuple, Optional, Union

import numpy as np
import torch as th
Expand Down
10 changes: 5 additions & 5 deletions sb3_contrib/common/maskable/distributions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import List, Optional, Tuple, TypeVar, Union
from typing import Optional, TypeVar, Union

import numpy as np
import torch as th
Expand Down Expand Up @@ -157,7 +157,7 @@ def actions_from_params(self, action_logits: th.Tensor, deterministic: bool = Fa
self.proba_distribution(action_logits)
return self.get_actions(deterministic=deterministic)

def log_prob_from_params(self, action_logits: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
def log_prob_from_params(self, action_logits: th.Tensor) -> tuple[th.Tensor, th.Tensor]:
actions = self.actions_from_params(action_logits)
log_prob = self.log_prob(actions)
return actions, log_prob
Expand All @@ -174,9 +174,9 @@ class MaskableMultiCategoricalDistribution(MaskableDistribution):
:param action_dims: List of sizes of discrete action spaces
"""

def __init__(self, action_dims: List[int]):
def __init__(self, action_dims: list[int]):
super().__init__()
self.distributions: List[MaskableCategorical] = []
self.distributions: list[MaskableCategorical] = []
self.action_dims = action_dims

def proba_distribution_net(self, latent_dim: int) -> nn.Module:
Expand Down Expand Up @@ -232,7 +232,7 @@ def actions_from_params(self, action_logits: th.Tensor, deterministic: bool = Fa
self.proba_distribution(action_logits)
return self.get_actions(deterministic=deterministic)

def log_prob_from_params(self, action_logits: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
def log_prob_from_params(self, action_logits: th.Tensor) -> tuple[th.Tensor, th.Tensor]:
actions = self.actions_from_params(action_logits)
log_prob = self.log_prob(actions)
return actions, log_prob
Expand Down
6 changes: 3 additions & 3 deletions sb3_contrib/common/maskable/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import warnings
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Optional, Union

import gymnasium as gym
import numpy as np
Expand All @@ -16,12 +16,12 @@ def evaluate_policy(
n_eval_episodes: int = 10,
deterministic: bool = True,
render: bool = False,
callback: Optional[Callable[[Dict[str, Any], Dict[str, Any]], None]] = None,
callback: Optional[Callable[[dict[str, Any], dict[str, Any]], None]] = None,
reward_threshold: Optional[float] = None,
return_episode_rewards: bool = False,
warn: bool = True,
use_masking: bool = True,
) -> Union[Tuple[float, float], Tuple[List[float], List[int]]]:
) -> Union[tuple[float, float], tuple[list[float], list[int]]]:
"""
Runs policy for ``n_eval_episodes`` episodes and returns average reward.
If a vector env is passed in, this divides the episodes to evaluate onto the
Expand Down
Loading
Loading