From b7f7c733ed678a5765be45b6188f8ba27eab1d22 Mon Sep 17 00:00:00 2001 From: Bronzila Date: Thu, 22 Jun 2023 12:45:19 +0200 Subject: [PATCH] Closes #23 and fixes spelling mistake in interfacing_DEHB example --- examples/00_interfacing_DEHB.ipynb | 2 +- ...1_Optimizing_RandomForest_using_DEHB.ipynb | 135 +++++++++--------- 2 files changed, 72 insertions(+), 65 deletions(-) diff --git a/examples/00_interfacing_DEHB.ipynb b/examples/00_interfacing_DEHB.ipynb index 00eb84a..71a76d0 100644 --- a/examples/00_interfacing_DEHB.ipynb +++ b/examples/00_interfacing_DEHB.ipynb @@ -322,7 +322,7 @@ } ], "source": [ - "# allows optimization to restart from the beginning by forgetting al observations\n", + "# allows optimization to restart from the beginning by forgetting all observations\n", "dehb.reset() \n", "\n", "_, _, _ = dehb.run(fevals=20, verbose=False, save_intermediate=True)\n", diff --git a/examples/01_Optimizing_RandomForest_using_DEHB.ipynb b/examples/01_Optimizing_RandomForest_using_DEHB.ipynb index 8d70812..c35427b 100644 --- a/examples/01_Optimizing_RandomForest_using_DEHB.ipynb +++ b/examples/01_Optimizing_RandomForest_using_DEHB.ipynb @@ -203,19 +203,20 @@ " dataset = np.random.choice(list(regression.keys()))\n", " _data = regression[dataset]()\n", "\n", - " train_X, test_X, train_y, test_y = train_test_split(\n", - " _data.get(\"data\"), \n", - " _data.get(\"target\"), \n", - " test_size=0.1, \n", - " shuffle=True, \n", - " random_state=seed\n", + " train_X, rest_X, train_y, rest_y = train_test_split(\n", + " _data.get(\"data\"), \n", + " _data.get(\"target\"), \n", + " train_size=0.7, \n", + " shuffle=True, \n", + " random_state=seed\n", " )\n", - " train_X, valid_X, train_y, valid_y = train_test_split(\n", - " _data.get(\"data\"), \n", - " _data.get(\"target\"), \n", - " test_size=0.3, \n", - " shuffle=True, \n", - " random_state=seed\n", + " \n", + " # 10% test and 20% validation data\n", + " valid_X, test_X, valid_y, test_y = train_test_split(\n", + " rest_X, rest_y,\n", + " test_size=0.3333, \n", + " shuffle=True, \n", + " random_state=seed\n", " )\n", " return train_X, train_y, valid_X, valid_y, test_X, test_y, dataset" ] @@ -231,7 +232,7 @@ "text": [ "wine\n", "Train size: (124, 13)\n", - "Valid size: (54, 13)\n", + "Valid size: (36, 13)\n", "Test size: (18, 13)\n" ] } @@ -375,16 +376,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "254 254 254\n", + "473 473 473\n", "\n", "Last evaluated configuration, \n", - "Configuration:\n", - " max_depth, Value: 4\n", - " max_features, Value: 0.7421389151242858\n", - " min_samples_leaf, Value: 3\n", - " min_samples_split, Value: 6\n", - "got a score of -1.0, was evaluated at a budget of 16.67 and took 0.021 seconds to run.\n", - "The additional info attached: {'test_score': 1.0, 'budget': 16.666666666666664}\n" + "Configuration(values={\n", + " 'max_depth': 7,\n", + " 'max_features': 0.669059250229961,\n", + " 'min_samples_leaf': 2,\n", + " 'min_samples_split': 3,\n", + "})\n", + "got a score of -1.0, was evaluated at a budget of 50.00 and took 0.048 seconds to run.\n", + "The additional info attached: {'test_score': 1.0, 'budget': 50.0}\n" ] } ], @@ -418,31 +420,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "2021-08-11 04:02:25.962 | INFO | dehb.optimizers.dehb:reset:107 - \n", - "\n", - "RESET at 08/11/21 04:02:25 CEST\n", - "\n", + "\u001b[32m2023-06-22 12:00:41.016\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdehb.optimizers.dehb\u001b[0m:\u001b[36mreset\u001b[0m:\u001b[36m107\u001b[0m - \u001b[1m\n", "\n", - "2021-08-11 04:02:36.081 | INFO | dehb.optimizers.dehb:reset:107 - \n", + "RESET at 06/22/23 12:00:40 CEST\n", "\n", - "RESET at 08/11/21 04:02:36 CEST\n", + "\u001b[0m\n", + "\u001b[32m2023-06-22 12:00:51.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdehb.optimizers.dehb\u001b[0m:\u001b[36mreset\u001b[0m:\u001b[36m107\u001b[0m - \u001b[1m\n", "\n", + "RESET at 06/22/23 12:00:51 CEST\n", "\n", - "2021-08-11 04:02:46.256 | INFO | dehb.optimizers.dehb:reset:107 - \n", + "\u001b[0m\n", + "\u001b[32m2023-06-22 12:01:01.182\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdehb.optimizers.dehb\u001b[0m:\u001b[36mreset\u001b[0m:\u001b[36m107\u001b[0m - \u001b[1m\n", "\n", - "RESET at 08/11/21 04:02:46 CEST\n", + "RESET at 06/22/23 12:01:01 CEST\n", "\n", + "\u001b[0m\n", + "\u001b[32m2023-06-22 12:01:11.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdehb.optimizers.dehb\u001b[0m:\u001b[36mreset\u001b[0m:\u001b[36m107\u001b[0m - \u001b[1m\n", "\n", - "2021-08-11 04:02:56.351 | INFO | dehb.optimizers.dehb:reset:107 - \n", + "RESET at 06/22/23 12:01:11 CEST\n", "\n", - "RESET at 08/11/21 04:02:56 CEST\n", + "\u001b[0m\n", + "\u001b[32m2023-06-22 12:01:21.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mdehb.optimizers.dehb\u001b[0m:\u001b[36mreset\u001b[0m:\u001b[36m107\u001b[0m - \u001b[1m\n", "\n", + "RESET at 06/22/23 12:01:21 CEST\n", "\n", - "2021-08-11 04:03:06.441 | INFO | dehb.optimizers.dehb:reset:107 - \n", - "\n", - "RESET at 08/11/21 04:03:06 CEST\n", - "\n", - "\n" + "\u001b[0m\n" ] } ], @@ -513,39 +515,44 @@ "name": "stdout", "output_type": "stream", "text": [ - "Configuration:\n", - " max_depth, Value: 12\n", - " max_features, Value: 0.6810033803746296\n", - " min_samples_leaf, Value: 1\n", - " min_samples_split, Value: 2\n", + "Configuration(values={\n", + " 'max_depth': 13,\n", + " 'max_features': 0.5412753369058052,\n", + " 'min_samples_leaf': 12,\n", + " 'min_samples_split': 14,\n", + "})\n", " got an accuracy of 1.0 on the test set.\n", "\n", - "Configuration:\n", - " max_depth, Value: 12\n", - " max_features, Value: 0.7019468557915487\n", - " min_samples_leaf, Value: 1\n", - " min_samples_split, Value: 2\n", + "Configuration(values={\n", + " 'max_depth': 6,\n", + " 'max_features': 0.6764411582074702,\n", + " 'min_samples_leaf': 1,\n", + " 'min_samples_split': 27,\n", + "})\n", " got an accuracy of 1.0 on the test set.\n", "\n", - "Configuration:\n", - " max_depth, Value: 14\n", - " max_features, Value: 0.557961004785893\n", - " min_samples_leaf, Value: 4\n", - " min_samples_split, Value: 21\n", + "Configuration(values={\n", + " 'max_depth': 5,\n", + " 'max_features': 0.5862915814751853,\n", + " 'min_samples_leaf': 2,\n", + " 'min_samples_split': 22,\n", + "})\n", " got an accuracy of 1.0 on the test set.\n", "\n", - "Configuration:\n", - " max_depth, Value: 10\n", - " max_features, Value: 0.7574798490327812\n", - " min_samples_leaf, Value: 12\n", - " min_samples_split, Value: 3\n", + "Configuration(values={\n", + " 'max_depth': 14,\n", + " 'max_features': 0.5346143393392929,\n", + " 'min_samples_leaf': 5,\n", + " 'min_samples_split': 9,\n", + "})\n", " got an accuracy of 1.0 on the test set.\n", "\n", - "Configuration:\n", - " max_depth, Value: 10\n", - " max_features, Value: 0.6303283393470247\n", - " min_samples_leaf, Value: 5\n", - " min_samples_split, Value: 25\n", + "Configuration(values={\n", + " 'max_depth': 4,\n", + " 'max_features': 0.5541455312635835,\n", + " 'min_samples_leaf': 4,\n", + " 'min_samples_split': 10,\n", + "})\n", " got an accuracy of 1.0 on the test set.\n", "\n" ] @@ -560,9 +567,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dask", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "dask" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -574,7 +581,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.9.16" } }, "nbformat": 4,