From 0ca557b389d2d4cd88cf0c8470f6210680ed9d72 Mon Sep 17 00:00:00 2001 From: Michael <165048583+lordmikerahl@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:52:42 +0000 Subject: [PATCH] added stuff --- baybe_hack.ipynb | 155 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 129 insertions(+), 26 deletions(-) diff --git a/baybe_hack.ipynb b/baybe_hack.ipynb index c9057c3..9cd8f9c 100644 --- a/baybe_hack.ipynb +++ b/baybe_hack.ipynb @@ -9,9 +9,121 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "df_AA2024 = pd.read_excel('data/filtered_AA2024.xlsx')\n", + "print(df_AA2024.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(df_AA2024.head())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.targets import NumericalTarget\n", + "from baybe.objective import Objective\n", + "\n", + "target = NumericalTarget(\n", + " name=\"Efficiency\",\n", + " mode=\"MAX\",\n", + ")\n", + "objective = Objective(mode=\"SINGLE\", targets=[target])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.parameters import (\n", + " NumericalContinuousParameter,\n", + ")\n", + "\n", + "parameters = [\n", + " NumericalContinuousParameter(\n", + " name=\"Time[h]\",\n", + " bounds=(0.500000, 672),\n", + " ),\n", + " NumericalContinuousParameter(\n", + " name=\"pH\",\n", + " bounds=(1, 14),\n", + " ),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.searchspace import SearchSpace\n", + "\n", + "searchspace = SearchSpace.from_product(parameters)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe import Campaign\n", + "\n", + "campaign = Campaign(searchspace, objective)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = campaign.recommend(batch_size=3)\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], + "source": [ + "############################\n", + "############################\n", + "############################" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\": Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n", + " warnings.warn(\n" + ] + } + ], "source": [ "import numpy as np\n", "import pandas as pd\n", @@ -55,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -64,7 +176,7 @@ "'\\noverpotential = NumericalTarget(\\n name=\"overpotential\", \\n mode=\"MAX\", \\n bounds=(-400, 0),\\n transformation=\"LINEAR\" # optional, will be applied if bounds are not None, LINEAR only one available for MAX/MIN\\n ) \\n\\noverpotential_slope = NumericalTarget(\\n name=\"overpotential_slope\", \\n mode=\"MAX\", \\n bounds=(-0.05, 0.05),\\n transformation=\"LINEAR\" # optional, will be applied if bounds are not None, LINEAR only one available for MAX/MIN\\n )\\n\\nobjective = Objective(\\n mode=\"DESIRABILITY\",\\n targets=[overpotential, overpotential_slope],\\n weights=[1.0, 1.0], # optional, by default all weights are equal\\n combine_func=\"GEOM_MEAN\", # optional, geometric mean is the default\\n)\\n'" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -103,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -140,27 +252,18 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { - "ename": "OptionalImportError", - "evalue": "The requested functionality requires the installation of optional chemistry dependencies. Please run \"pip install 'baybe[chem]'\".", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/utils/chemistry.py:17\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmordred\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Calculator, descriptors\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Chem, RDLogger\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mordred'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mOptionalImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mparameters\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SubstanceParameter\n\u001b[0;32m----> 3\u001b[0m \u001b[43mSubstanceParameter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mWater\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m1-Octanol\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCCCCCCCCO\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mToluene\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCC1=CC=CC=C1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mMORDRED\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# optional\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecorrelate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.7\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# optional\u001b[39;49;00m\n\u001b[1;32m 12\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[0;32m:10\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, name, data, decorrelate, encoding)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _config\u001b[38;5;241m.\u001b[39m_run_validators \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 9\u001b[0m __attr_validator_name(\u001b[38;5;28mself\u001b[39m, __attr_name, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname)\n\u001b[0;32m---> 10\u001b[0m \u001b[43m__attr_validator_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m__attr_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m __attr_validator_decorrelate(\u001b[38;5;28mself\u001b[39m, __attr_decorrelate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecorrelate)\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/attr/_make.py:3059\u001b[0m, in \u001b[0;36m_AndValidator.__call__\u001b[0;34m(self, inst, attr, value)\u001b[0m\n\u001b[1;32m 3057\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, inst, attr, value):\n\u001b[1;32m 3058\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validators:\n\u001b[0;32m-> 3059\u001b[0m \u001b[43mv\u001b[49m\u001b[43m(\u001b[49m\u001b[43minst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/parameters/substance.py:77\u001b[0m, in \u001b[0;36mSubstanceParameter._validate_substance_data\u001b[0;34m(self, _, data)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;129m@data\u001b[39m\u001b[38;5;241m.\u001b[39mvalidator\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_validate_substance_data\u001b[39m( \u001b[38;5;66;03m# noqa: DOC101, DOC103\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28mself\u001b[39m, _: Any, data: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Smiles]\n\u001b[1;32m 70\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 71\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Validate that the substance data, provided as SMILES, is valid.\u001b[39;00m\n\u001b[1;32m 72\u001b[0m \n\u001b[1;32m 73\u001b[0m \u001b[38;5;124;03m Raises:\u001b[39;00m\n\u001b[1;32m 74\u001b[0m \u001b[38;5;124;03m ValueError: If one or more of the SMILES are invalid.\u001b[39;00m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;124;03m ValueError: If the several entries represent the same substance.\u001b[39;00m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 77\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m chemistry\n\u001b[1;32m 79\u001b[0m \u001b[38;5;66;03m# Check for invalid SMILES\u001b[39;00m\n\u001b[1;32m 80\u001b[0m canonical_smiles \u001b[38;5;241m=\u001b[39m {}\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/utils/chemistry.py:21\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mChem\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrdMolDescriptors\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GetMorganFingerprintAsBitVect\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m OptionalImportError(\n\u001b[1;32m 22\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"The requested functionality requires the installation of optional \"\"\"\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"chemistry dependencies. Please run \"pip install 'baybe[chem]'\".\"\"\"\u001b[39;00m\n\u001b[1;32m 24\u001b[0m )\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnumerical\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DTypeFloatNumpy\n\u001b[1;32m 28\u001b[0m _mordred_calculator \u001b[38;5;241m=\u001b[39m Calculator(descriptors)\n", - "\u001b[0;31mOptionalImportError\u001b[0m: The requested functionality requires the installation of optional chemistry dependencies. Please run \"pip install 'baybe[chem]'\"." - ] + "data": { + "text/plain": [ + "SubstanceParameter(name='Solvent', data={'Water': 'O', '1-Octanol': 'CCCCCCCCO', 'Toluene': 'CC1=CC=CC=C1'}, decorrelate=0.7, encoding=)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -208,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -220,7 +323,7 @@ "Polymer C -39 241, decorrelate=True, encoding=)" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -250,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -284,7 +387,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [