From 8cf4493727703bf729372f4d9edbf6e6caff4224 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20W=C3=BCrger?=
 <44372393+koerper@users.noreply.github.com>
Date: Wed, 27 Mar 2024 21:31:17 +0000
Subject: [PATCH] Implement campaign simulation

---
 src/tim_baybe-inhibitor.ipynb | 363 ++++++++++++++--------------------
 1 file changed, 150 insertions(+), 213 deletions(-)
diff --git a/src/tim_baybe-inhibitor.ipynb b/src/tim_baybe-inhibitor.ipynb
index 67fc765..4563a7d 100644
--- a/src/tim_baybe-inhibitor.ipynb
+++ b/src/tim_baybe-inhibitor.ipynb
@@ -37,7 +37,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPConnection object at 0x7fc39ecd08e0>: Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n",
+      "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\"<urllib3.connection.HTTPConnection object at 0x7f90d90e3490>: Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n",
       "  warnings.warn(\n",
       "/home/vscode/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
@@ -197,13 +197,6 @@
     "efficiency_min, efficiency_max = df_AA2024.Efficiency.min(), df_AA2024.Efficiency.max()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -213,34 +206,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "import sys\n",
-    "sys.path.append('../utils')\n",
-    "from subsampling import random_subsample"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(50, 6)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "random_subsample(df_AA2024, 50).shape"
-   ]
+   "source": []
   },
   {
    "cell_type": "markdown",
@@ -251,169 +220,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>SMILES</th>\n",
-       "      <th>Time_h</th>\n",
-       "      <th>pH</th>\n",
-       "      <th>Inhib_Concentrat_M</th>\n",
-       "      <th>Salt_Concentrat_M</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>COCCOC(=O)OCSc1nc2c(s1)cccc2</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>0.0010</td>\n",
-       "      <td>0.10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>COCCOC(=O)OCSc1nc2c(s1)cccc2</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>0.0010</td>\n",
-       "      <td>0.10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Cc1ccc(c(c1)n1nc2c(n1)cccc2)O</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>0.0010</td>\n",
-       "      <td>0.10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Cc1ccc(c(c1)n1nc2c(n1)cccc2)O</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>0.0010</td>\n",
-       "      <td>0.10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>0.0010</td>\n",
-       "      <td>0.10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>606</th>\n",
-       "      <td>S=c1sc2c([nH]1)cccc2</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0.0005</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>607</th>\n",
-       "      <td>C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0.0005</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>608</th>\n",
-       "      <td>C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0.0005</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>609</th>\n",
-       "      <td>C(=O)(C(=O)[O-])[O-]</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0.0005</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>610</th>\n",
-       "      <td>C(=O)(C(=O)[O-])[O-]</td>\n",
-       "      <td>24.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0.0005</td>\n",
-       "      <td>0.05</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>611 rows × 5 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                      SMILES  Time_h    pH  \\\n",
-       "0               COCCOC(=O)OCSc1nc2c(s1)cccc2    24.0   4.0   \n",
-       "1               COCCOC(=O)OCSc1nc2c(s1)cccc2    24.0  10.0   \n",
-       "2              Cc1ccc(c(c1)n1nc2c(n1)cccc2)O    24.0   4.0   \n",
-       "3              Cc1ccc(c(c1)n1nc2c(n1)cccc2)O    24.0  10.0   \n",
-       "4    Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O    24.0   4.0   \n",
-       "..                                       ...     ...   ...   \n",
-       "606                     S=c1sc2c([nH]1)cccc2    24.0   7.0   \n",
-       "607    C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O    24.0   7.0   \n",
-       "608    C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O    24.0   7.0   \n",
-       "609                     C(=O)(C(=O)[O-])[O-]    24.0   7.0   \n",
-       "610                     C(=O)(C(=O)[O-])[O-]    24.0   7.0   \n",
-       "\n",
-       "     Inhib_Concentrat_M  Salt_Concentrat_M  \n",
-       "0                0.0010               0.10  \n",
-       "1                0.0010               0.10  \n",
-       "2                0.0010               0.10  \n",
-       "3                0.0010               0.10  \n",
-       "4                0.0010               0.10  \n",
-       "..                  ...                ...  \n",
-       "606              0.0005               0.05  \n",
-       "607              0.0005               0.05  \n",
-       "608              0.0005               0.05  \n",
-       "609              0.0005               0.05  \n",
-       "610              0.0005               0.05  \n",
-       "\n",
-       "[611 rows x 5 columns]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_AA2024[[\"SMILES\", \"Time_h\", \"pH\", \"Inhib_Concentrat_M\", \"Salt_Concentrat_M\"]]"
-   ]
+   "outputs": [],
+   "source": []
   },
   {
    "cell_type": "markdown",
@@ -424,7 +234,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -489,7 +299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -602,7 +412,7 @@
        "[611 rows x 388 columns]), continuous=SubspaceContinuous(parameters=[], constraints_lin_eq=[], constraints_lin_ineq=[]))"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -613,21 +423,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
-     "ename": "IndexError",
-     "evalue": "boolean index did not match indexed array along dimension 0; dimension is 611 but corresponding boolean dimension is 921",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[13], line 8\u001b[0m\n\u001b[1;32m      5\u001b[0m N_DOE_ITERATIONS \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m5\u001b[39m\n\u001b[1;32m      6\u001b[0m N_MC_ITERATIONS \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m5\u001b[39m\n\u001b[0;32m----> 8\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43msimulate_experiment\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcampaign\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdf_AA2024\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     11\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mBATCH_SIZE\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     12\u001b[0m \u001b[43m    \u001b[49m\u001b[43mn_doe_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mN_DOE_ITERATIONS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[43m    \u001b[49m\u001b[43mimpute_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mignore\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     14\u001b[0m \u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/simulation.py:473\u001b[0m, in \u001b[0;36msimulate_experiment\u001b[0;34m(campaign, lookup, batch_size, n_doe_iterations, initial_data, random_seed, impute_mode, noise_percent)\u001b[0m\n\u001b[1;32m    471\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m impute_mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    472\u001b[0m     searchspace \u001b[38;5;241m=\u001b[39m campaign\u001b[38;5;241m.\u001b[39msearchspace\u001b[38;5;241m.\u001b[39mdiscrete\u001b[38;5;241m.\u001b[39mexp_rep\n\u001b[0;32m--> 473\u001b[0m     missing_inds \u001b[38;5;241m=\u001b[39m \u001b[43msearchspace\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m    474\u001b[0m \u001b[43m        \u001b[49m\u001b[43msearchspace\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmerge\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlookup\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mleft\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindicator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m_merge\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m    475\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mleft_only\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m    476\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m    477\u001b[0m     campaign\u001b[38;5;241m.\u001b[39msearchspace\u001b[38;5;241m.\u001b[39mdiscrete\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mloc[\n\u001b[1;32m    478\u001b[0m         missing_inds, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdont_recommend\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    479\u001b[0m     ] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m    481\u001b[0m \u001b[38;5;66;03m# Run the DOE loop\u001b[39;00m\n",
-      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/indexes/range.py:1030\u001b[0m, in \u001b[0;36mRangeIndex.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_scalar(key):\n\u001b[1;32m   1024\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m(\n\u001b[1;32m   1025\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124monly integers, slices (`:`), \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1026\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mellipsis (`...`), numpy.newaxis (`None`) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1027\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mand integer or boolean \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1028\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marrays are valid indices\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1029\u001b[0m     )\n\u001b[0;32m-> 1030\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getitem__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py:5416\u001b[0m, in \u001b[0;36mIndex.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   5407\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(key) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(key) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m   5408\u001b[0m             warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m   5409\u001b[0m                 \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing a boolean indexer with length 0 on an Index with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   5410\u001b[0m                 \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlength greater than 0 is deprecated and will raise in a \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   5413\u001b[0m                 stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[1;32m   5414\u001b[0m             )\n\u001b[0;32m-> 5416\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mgetitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   5417\u001b[0m \u001b[38;5;66;03m# Because we ruled out integer above, we always get an arraylike here\u001b[39;00m\n\u001b[1;32m   5418\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m result\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n",
-      "\u001b[0;31mIndexError\u001b[0m: boolean index did not match indexed array along dimension 0; dimension is 611 but corresponding boolean dimension is 921"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/vscode/.local/lib/python3.10/site-packages/botorch/models/transforms/outcome.py:304: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1760.)\n",
+      "  stdvs = Y.std(dim=-2, keepdim=True)\n",
+      "/home/vscode/.local/lib/python3.10/site-packages/botorch/models/utils/assorted.py:194: UserWarning: std(): degrees of freedom is <= 0. Correction should be strictly less than the reduction factor (input numel divided by output numel). (Triggered internally at ../aten/src/ATen/native/ReduceOps.cpp:1760.)\n",
+      "  Ymean, Ystd = torch.mean(Y, dim=-2), torch.std(Y, dim=-2)\n",
+      "The lookup rows with indexes [297 300 303 306] seem to be duplicates regarding parameter values. Choosing a random one.\n",
+      "The lookup rows with indexes [297 300 303 306] seem to be duplicates regarding parameter values. Choosing a random one.\n",
+      "The lookup rows with indexes [297 300 303 306] seem to be duplicates regarding parameter values. Choosing a random one.\n",
+      "The lookup rows with indexes [297 300 303 306] seem to be duplicates regarding parameter values. Choosing a random one.\n",
+      "Input row with index 297 has multiple matches with the search space. This could indicate that something went wrong. Matching only first occurrence.\n",
+      "Input row with index 300 has multiple matches with the search space. This could indicate that something went wrong. Matching only first occurrence.\n",
+      "Input row with index 303 has multiple matches with the search space. This could indicate that something went wrong. Matching only first occurrence.\n",
+      "Input row with index 306 has multiple matches with the search space. This could indicate that something went wrong. Matching only first occurrence.\n"
      ]
     }
    ],
@@ -644,8 +458,131 @@
     "    df_AA2024,\n",
     "    batch_size=BATCH_SIZE,\n",
     "    n_doe_iterations=N_DOE_ITERATIONS,\n",
-    "    impute_mode=\"ignore\",\n",
-    ")"
+    "    impute_mode=\"best\",\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Iteration</th>\n",
+       "      <th>Num_Experiments</th>\n",
+       "      <th>Efficiency_Measurements</th>\n",
+       "      <th>Efficiency_IterBest</th>\n",
+       "      <th>Efficiency_CumBest</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>[60.35]</td>\n",
+       "      <td>60.35</td>\n",
+       "      <td>60.35</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>[40.0, 73.0, 40.0, 43.0]</td>\n",
+       "      <td>73.00</td>\n",
+       "      <td>73.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>[78.26]</td>\n",
+       "      <td>78.26</td>\n",
+       "      <td>78.26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>7</td>\n",
+       "      <td>[89.68]</td>\n",
+       "      <td>89.68</td>\n",
+       "      <td>89.68</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>8</td>\n",
+       "      <td>[50.0]</td>\n",
+       "      <td>50.00</td>\n",
+       "      <td>89.68</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Iteration  Num_Experiments   Efficiency_Measurements  Efficiency_IterBest  \\\n",
+       "0          0                1                   [60.35]                60.35   \n",
+       "1          1                5  [40.0, 73.0, 40.0, 43.0]                73.00   \n",
+       "2          2                6                   [78.26]                78.26   \n",
+       "3          3                7                   [89.68]                89.68   \n",
+       "4          4                8                    [50.0]                50.00   \n",
+       "\n",
+       "   Efficiency_CumBest  \n",
+       "0               60.35  \n",
+       "1               73.00  \n",
+       "2               78.26  \n",
+       "3               89.68  \n",
+       "4               89.68  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100.0"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_AA2024.Efficiency.max()"
    ]
   },
   {

	SMILES	Time_h	pH	Inhib_Concentrat_M	Salt_Concentrat_M
0	COCCOC(=O)OCSc1nc2c(s1)cccc2	24.0	4.0	0.0010	0.10
1	COCCOC(=O)OCSc1nc2c(s1)cccc2	24.0	10.0	0.0010	0.10
2	Cc1ccc(c(c1)n1nc2c(n1)cccc2)O	24.0	4.0	0.0010	0.10
3	Cc1ccc(c(c1)n1nc2c(n1)cccc2)O	24.0	10.0	0.0010	0.10
4	Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O	24.0	4.0	0.0010	0.10
...	...	...	...	...	...
606	S=c1sc2c([nH]1)cccc2	24.0	7.0	0.0005	0.05
607	C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O	24.0	7.0	0.0005	0.05
608	C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O	24.0	7.0	0.0005	0.05
609	C(=O)(C(=O)[O-])[O-]	24.0	7.0	0.0005	0.05
610	C(=O)(C(=O)[O-])[O-]	24.0	7.0	0.0005	0.05
	Iteration	Num_Experiments	Efficiency_Measurements	Efficiency_IterBest	Efficiency_CumBest
0	0	1	[60.35]	60.35	60.35
1	1	5	[40.0, 73.0, 40.0, 43.0]	73.00	73.00
2	2	6	[78.26]	78.26	78.26
3	3	7	[89.68]	89.68	89.68
4	4	8	[50.0]	50.00	89.68