adjust number of envs per env_runner

Microgorath · Microgorath · commit e84c2f8dd064 · 2024-07-30T00:08:51.000Z
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -5,7 +5,7 @@
 	"image": "tensorflow/tensorflow:2.15.0-gpu-jupyter",
 
 	"runArgs": ["--gpus=all",
-        "--shm-size=24gb"
+        "--shm-size=50gb"
     ],
 
 	// Features to add to the dev container. More info: https://containers.dev/features.
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 ### Requirements Overview
 This notebook uses RLLib, an open-source scalable reinforcement learning library in the Ray framework.  
 RLLib currently supports Python 3.9 - 3.12.  
-RLLib supports both PyTorch and Tensorflow, so either may be used. Preferably, the library used will be CUDA-enabled to utilize the GPU, but it is optional. Nvidia GPU support only.  
+RLLib supports both PyTorch and Tensorflow, so either may be used. This setup will assume GPU will be used, but it is not necessary for most algorithms. Training with GPU was found to be slightly slower than only using CPU for DQN. GPU use is most likely only useful for large models that take longer for inference or backprop.  
 
 ### Tensorflow GPU Support
 A dev container is provided that will set up a Linux Tensorflow 2.15.0-gpu-jupyter Docker container with everything set up for Tensorflow GPU support, which also starts its own local pokemon showdown server when started. The showdown server is port forwarded to be visible on the host, at http://localhost:8000.  
diff --git a/notebooks/basic_rl.ipynb b/notebooks/basic_rl.ipynb
@@ -198,6 +198,7 @@
     "from ray.rllib.algorithms.dqn import DQNConfig\n",
     "from ray import tune, train\n",
     "import os\n",
+    "import ray\n",
     "\n",
     "# This is passed to each environment (SimpleRLPlayer) during training.\n",
     "# 'player_config' is passed as a kwarg to the super().__init__() of SimpleRLPlayer's Gen9EnvSinglePlayer superclass.\n",
@@ -208,7 +209,7 @@
     "        'start_challenging': True,\n",
     "    },\n",
     "    'opponent_class': MaxBasePowerPlayer,\n",
-    "    'opponent_username': 'tr_MaxBasePower',\n",
+    "    'opponent_username': 'tr_MaxBP',\n",
     "    'opponent_config': {\n",
     "        'battle_format': \"gen9randombattle\",\n",
     "    },\n",
@@ -221,33 +222,40 @@
     "        'start_challenging': True,\n",
     "    },\n",
     "    'opponent_class': MaxBasePowerPlayer,\n",
-    "    'opponent_username': 'ev_MaxBasePower',\n",
+    "    'opponent_username': 'ev_MaxBP',\n",
     "    'opponent_config': {\n",
     "        'battle_format': \"gen9randombattle\",\n",
     "    },\n",
     "}\n",
     "\n",
-    "# Guide to RLLib parameters: https://docs.ray.io/en/latest/rllib/rllib-training.html#common-parameters \n",
+    "# Guide to RLLib parameters: https://docs.ray.io/en/latest/rllib/rllib-training.html#common-parameters\n",
+    "\n",
     "config = DQNConfig()\n",
     "config = config.environment(env = SimpleRLPlayer, env_config = train_env_config)\n",
     "# Set the framework to use. \"tf2\" for tensorflow, \"torch\" for PyTorch. Dev container is set up for Tensorflow 2.13.\n",
     "config = config.framework(framework=\"tf2\")\n",
     "config = config.resources(\n",
-    "    num_cpus_for_main_process=2,\n",
-    "    num_gpus=1,\n",
+    "    num_cpus_for_main_process=4,\n",
+    "    num_gpus=0,\n",
+    ")\n",
+    "config = config.learners(\n",
+    "    num_learners=0,\n",
+    "    # num_gpus_per_learner=0\n",
     ")\n",
     "config = config.env_runners(\n",
+    "    # Number of cpus assigned to each env_runner. Does not improve sampling speed very much on its own. \n",
     "    num_cpus_per_env_runner=1,\n",
     "    # Number of workers to run environments. 0 forces rollouts onto the local worker.\n",
-    "    num_env_runners=20,\n",
-    "    num_envs_per_env_runner=1,\n",
+    "    num_env_runners=4,\n",
+    "    # Number of environments on each env_runner worker, higher drastically improves sampling speed.\n",
+    "    num_envs_per_env_runner=4,\n",
     "    # Don't cut off episodes before they finish when batching.\n",
     "    # As a result, the batch size hyperparameter acts as a minimum and batches may vary in size.\n",
     "    batch_mode=\"complete_episodes\",\n",
     "    # Validation creates environments and does not close them, causes problems.\n",
     "    # validate_env_runners_after_construction=False,\n",
-    "    # rollout_fragment_length=300,\n",
-    "    rollout_fragment_length=\"auto\",\n",
+    "    rollout_fragment_length=50,\n",
+    "    # rollout_fragment_length=\"auto\",\n",
     "    explore=True,\n",
     "    exploration_config = {\n",
     "            \"type\": \"EpsilonGreedy\",\n",
@@ -271,8 +279,8 @@
     "        \"capacity\": 100000,\n",
     "    },\n",
     "    num_steps_sampled_before_learning_starts=1000,\n",
-    "    # v_min=-48, # minimum reward\n",
-    "    # v_max=48, # maximum reward\n",
+    "    v_min=-48, # minimum reward\n",
+    "    v_max=48, # maximum reward\n",
     "    # n_step=1,\n",
     "    double_q=False,\n",
     "    # double_q=tune.grid_search([True, False]),\n",
@@ -281,11 +289,11 @@
     "    # noisy=tune.grid_search([True, False]),\n",
     "    dueling=False,\n",
     "    # dueling=tune.grid_search([True, False]),\n",
-    "    train_batch_size=300\n",
+    "    train_batch_size=1200,\n",
     ")\n",
     "config = config.evaluation(\n",
     "    evaluation_interval=1,\n",
-    "    evaluation_num_env_runners=2,\n",
+    "    evaluation_num_env_runners=4,\n",
     "    # evaluation_parallel_to_training=True,\n",
     "    evaluation_duration=30,\n",
     "    evaluation_config={\n",
@@ -296,13 +304,22 @@
     "    },\n",
     ")\n",
     "# These settings allows runs to continue after a worker fails for whatever reason.\n",
-    "# config = config.fault_tolerance(recreate_failed_env_runners=True)\n",
+    "config = config.fault_tolerance(recreate_failed_env_runners=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Set stopping criteria for the trials\n",
+    "from ray.tune.stopper import CombinedStopper, MaximumIterationStopper, TrialPlateauStopper\n",
     "\n",
-    "# This sets the stopping criteria for the run.\n",
-    "stop = {\n",
-    "    # \"evaluation/env_runners/episode_reward_mean\": 30,\n",
-    "    \"training_iteration\": 100,\n",
-    "}"
+    "stopper = CombinedStopper(\n",
+    "    MaximumIterationStopper(max_iter=120),\n",
+    "    TrialPlateauStopper(metric=\"evaluation/env_runners/episode_reward_mean\"),\n",
+    ")"
    ]
   },
   {
@@ -329,18 +346,18 @@
     "        # scheduler= NoneProvided, # When using concurrent trials, this ends or changes poorly performing trials early.\n",
     "    ),\n",
     "    run_config=train.RunConfig(\n",
-    "        name=\"DQN_SimpleRL_vs_MaxBP\",\n",
+    "        name=\"DQN_SimpleRL_v_MaxBP_1\",\n",
     "        storage_path=os.path.abspath(os.path.join(os.getcwd(), os.pardir, 'results')),\n",
-    "        stop=stop,\n",
+    "        stop=stopper,\n",
     "        checkpoint_config=train.CheckpointConfig(\n",
-    "            checkpoint_frequency=10,\n",
+    "            checkpoint_frequency=1,\n",
     "            # checkpoint_score_attribute is the metric to use to determine which checkpoints to keep.\n",
     "            checkpoint_score_attribute=\"evaluation/env_runners/episode_reward_mean\",\n",
     "            # Only the best num_to_keep checkpoints are saved, using checkpoint_score_attribute as the metric to compare.\n",
     "            num_to_keep=1,\n",
     "            # checkpoint_score_order determines whether a higher (\"max\") or lower (\"min\") checkpoint_score_attribute is better.\n",
     "            checkpoint_score_order=\"max\",\n",
-    "            checkpoint_at_end=True\n",
+    "            # checkpoint_at_end=True\n",
     "        ),\n",
     "    ),\n",
     "\n",
@@ -376,7 +393,7 @@
     "# If manually loading a checkpoint from a path, you can skip all above cells after SimpleRLPlayer class creation.\n",
     "# The test_checkpoint path should end with the checkpoint_XXXXXX directory, where X's are the checkpoint number with leading 0s.\n",
     "\n",
-    "# test_checkpoint = \"../results/DQN_SimpleRL_vs_MaxBP/DQN_SimpleRLPlayer_4345e_00024_24_lr=0.0002,weight_decay=0.0194_2024-07-17_13-00-24/checkpoint_000000\""
+    "# test_checkpoint = \"../results/DQN_SimpleRL_v_MaxBP_1/DQN_SimpleRLPlayer_9fc91_00010_10_train_batch_size=900_2024-07-26_13-50-33/checkpoint_000019\""
    ]
   },
   {
@@ -525,7 +542,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.11.0rc1"
   }
  },
  "nbformat": 4,