From 0571abd7474b2c3330f7d09523807eb6734ae707 Mon Sep 17 00:00:00 2001 From: Alex6022 <7952557+Alex6022@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:21:51 +0000 Subject: [PATCH 1/3] Implemented first notebook --- BayBE_basic.ipynb | 146 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 BayBE_basic.ipynb diff --git a/BayBE_basic.ipynb b/BayBE_basic.ipynb new file mode 100644 index 0000000..70ab7b4 --- /dev/null +++ b/BayBE_basic.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import filtered data set" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "df_AA2024 = pd.read_excel('data/filtered_AA2024.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " SMILES Time_h pH Inhib_Concentrat_M \\\n", + "0 COCCOC(=O)OCSc1nc2c(s1)cccc2 24.0 4.0 0.001 \n", + "1 COCCOC(=O)OCSc1nc2c(s1)cccc2 24.0 10.0 0.001 \n", + "2 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O 24.0 4.0 0.001 \n", + "3 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O 24.0 10.0 0.001 \n", + "4 Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O 24.0 4.0 0.001 \n", + "\n", + " Efficiency \n", + "0 0.0 \n", + "1 0.0 \n", + "2 30.0 \n", + "3 30.0 \n", + "4 30.0 \n" + ] + } + ], + "source": [ + "print(df_AA2024.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set objectives and parameters in BayBE" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.targets import NumericalTarget\n", + "from baybe.objective import Objective\n", + "\n", + "target = NumericalTarget(\n", + " name=\"Efficiency\",\n", + " mode=\"MAX\",\n", + ")\n", + "objective = Objective(mode=\"SINGLE\", targets=[target])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "ename": "OptionalImportError", + "evalue": "The requested functionality requires the installation of optional chemistry dependencies. Please run \"pip install 'baybe[chem]'\".", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/utils/chemistry.py:17\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmordred\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Calculator, descriptors\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Chem, RDLogger\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mordred'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mOptionalImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[18], line 18\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mparameters\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 2\u001b[0m CategoricalParameter,\n\u001b[1;32m 3\u001b[0m NumericalDiscreteParameter,\n\u001b[1;32m 4\u001b[0m SubstanceParameter,\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 8\u001b[0m CategoricalParameter(\n\u001b[1;32m 9\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGranularity\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 10\u001b[0m values\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoarse\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmedium\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfine\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 11\u001b[0m encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOHE\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;66;03m# one-hot encoding of categories\u001b[39;00m\n\u001b[1;32m 12\u001b[0m ),\n\u001b[1;32m 13\u001b[0m NumericalDiscreteParameter(\n\u001b[1;32m 14\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPressure[bar]\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 15\u001b[0m values\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m5\u001b[39m, \u001b[38;5;241m10\u001b[39m],\n\u001b[1;32m 16\u001b[0m tolerance\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.2\u001b[39m, \u001b[38;5;66;03m# allows experimental inaccuracies up to 0.2 when reading values\u001b[39;00m\n\u001b[1;32m 17\u001b[0m ),\n\u001b[0;32m---> 18\u001b[0m \u001b[43mSubstanceParameter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent A\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCOC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent B\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCCC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# label-SMILES pairs\u001b[39;49;00m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent C\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent D\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCS(=O)C\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mMORDRED\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# chemical encoding via mordred package\u001b[39;49;00m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 28\u001b[0m ]\n", + "File \u001b[0;32m:10\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, name, data, decorrelate, encoding)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _config\u001b[38;5;241m.\u001b[39m_run_validators \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 9\u001b[0m __attr_validator_name(\u001b[38;5;28mself\u001b[39m, __attr_name, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname)\n\u001b[0;32m---> 10\u001b[0m \u001b[43m__attr_validator_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m__attr_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m __attr_validator_decorrelate(\u001b[38;5;28mself\u001b[39m, __attr_decorrelate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecorrelate)\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/attr/_make.py:3059\u001b[0m, in \u001b[0;36m_AndValidator.__call__\u001b[0;34m(self, inst, attr, value)\u001b[0m\n\u001b[1;32m 3057\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, inst, attr, value):\n\u001b[1;32m 3058\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validators:\n\u001b[0;32m-> 3059\u001b[0m \u001b[43mv\u001b[49m\u001b[43m(\u001b[49m\u001b[43minst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/parameters/substance.py:77\u001b[0m, in \u001b[0;36mSubstanceParameter._validate_substance_data\u001b[0;34m(self, _, data)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;129m@data\u001b[39m\u001b[38;5;241m.\u001b[39mvalidator\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_validate_substance_data\u001b[39m( \u001b[38;5;66;03m# noqa: DOC101, DOC103\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28mself\u001b[39m, _: Any, data: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Smiles]\n\u001b[1;32m 70\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 71\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Validate that the substance data, provided as SMILES, is valid.\u001b[39;00m\n\u001b[1;32m 72\u001b[0m \n\u001b[1;32m 73\u001b[0m \u001b[38;5;124;03m Raises:\u001b[39;00m\n\u001b[1;32m 74\u001b[0m \u001b[38;5;124;03m ValueError: If one or more of the SMILES are invalid.\u001b[39;00m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;124;03m ValueError: If the several entries represent the same substance.\u001b[39;00m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 77\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m chemistry\n\u001b[1;32m 79\u001b[0m \u001b[38;5;66;03m# Check for invalid SMILES\u001b[39;00m\n\u001b[1;32m 80\u001b[0m canonical_smiles \u001b[38;5;241m=\u001b[39m {}\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/utils/chemistry.py:21\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mChem\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrdMolDescriptors\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GetMorganFingerprintAsBitVect\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m OptionalImportError(\n\u001b[1;32m 22\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"The requested functionality requires the installation of optional \"\"\"\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"chemistry dependencies. Please run \"pip install 'baybe[chem]'\".\"\"\"\u001b[39;00m\n\u001b[1;32m 24\u001b[0m )\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnumerical\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DTypeFloatNumpy\n\u001b[1;32m 28\u001b[0m _mordred_calculator \u001b[38;5;241m=\u001b[39m Calculator(descriptors)\n", + "\u001b[0;31mOptionalImportError\u001b[0m: The requested functionality requires the installation of optional chemistry dependencies. Please run \"pip install 'baybe[chem]'\"." + ] + } + ], + "source": [ + "from baybe.parameters import (\n", + " CategoricalParameter,\n", + " NumericalDiscreteParameter,\n", + " SubstanceParameter,\n", + ")\n", + "\n", + "parameters = [\n", + " NumericalDiscreteParameter(\n", + " name=\"Time[h]\",\n", + " ),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.searchspace import SearchSpace\n", + "\n", + "searchspace = SearchSpace.from_product(parameters)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 98f43ee84f8f06584b296533fb0701cb64490ffd Mon Sep 17 00:00:00 2001 From: Alex6022 <7952557+Alex6022@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:37:29 +0000 Subject: [PATCH 2/3] Implemented basic workflow --- BayBE_basic.ipynb | 109 +++++++++++++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 31 deletions(-) diff --git a/BayBE_basic.ipynb b/BayBE_basic.ipynb index 70ab7b4..be1cf97 100644 --- a/BayBE_basic.ipynb +++ b/BayBE_basic.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -21,7 +21,32 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Time_h pH Inhib_Concentrat_M Efficiency\n", + "count 611.000000 611.000000 611.000000 611.000000\n", + "mean 135.801964 6.342062 0.006808 26.736841\n", + "std 201.683867 2.529080 0.014059 288.788317\n", + "min 0.500000 0.000000 0.000010 -4834.000000\n", + "25% 24.000000 4.000000 0.000500 30.000000\n", + "50% 24.000000 7.000000 0.001000 58.000000\n", + "75% 144.000000 7.000000 0.003000 87.950000\n", + "max 672.000000 10.000000 0.100000 100.000000\n" + ] + } + ], + "source": [ + "print(df_AA2024.describe())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -52,12 +77,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Set objectives and parameters in BayBE" + "## Set objectives and parameters in BayBE to define the search space" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -73,46 +98,29 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 31, "metadata": {}, - "outputs": [ - { - "ename": "OptionalImportError", - "evalue": "The requested functionality requires the installation of optional chemistry dependencies. Please run \"pip install 'baybe[chem]'\".", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/utils/chemistry.py:17\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmordred\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Calculator, descriptors\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Chem, RDLogger\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'mordred'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mOptionalImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[18], line 18\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mparameters\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 2\u001b[0m CategoricalParameter,\n\u001b[1;32m 3\u001b[0m NumericalDiscreteParameter,\n\u001b[1;32m 4\u001b[0m SubstanceParameter,\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 8\u001b[0m CategoricalParameter(\n\u001b[1;32m 9\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGranularity\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 10\u001b[0m values\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoarse\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmedium\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfine\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 11\u001b[0m encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOHE\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;66;03m# one-hot encoding of categories\u001b[39;00m\n\u001b[1;32m 12\u001b[0m ),\n\u001b[1;32m 13\u001b[0m NumericalDiscreteParameter(\n\u001b[1;32m 14\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPressure[bar]\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 15\u001b[0m values\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m5\u001b[39m, \u001b[38;5;241m10\u001b[39m],\n\u001b[1;32m 16\u001b[0m tolerance\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.2\u001b[39m, \u001b[38;5;66;03m# allows experimental inaccuracies up to 0.2 when reading values\u001b[39;00m\n\u001b[1;32m 17\u001b[0m ),\n\u001b[0;32m---> 18\u001b[0m \u001b[43mSubstanceParameter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent A\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCOC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent B\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCCC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# label-SMILES pairs\u001b[39;49;00m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent C\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSolvent D\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mCS(=O)C\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mMORDRED\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# chemical encoding via mordred package\u001b[39;49;00m\n\u001b[1;32m 27\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 28\u001b[0m ]\n", - "File \u001b[0;32m:10\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, name, data, decorrelate, encoding)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _config\u001b[38;5;241m.\u001b[39m_run_validators \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 9\u001b[0m __attr_validator_name(\u001b[38;5;28mself\u001b[39m, __attr_name, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname)\n\u001b[0;32m---> 10\u001b[0m \u001b[43m__attr_validator_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m__attr_data\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m __attr_validator_decorrelate(\u001b[38;5;28mself\u001b[39m, __attr_decorrelate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecorrelate)\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/attr/_make.py:3059\u001b[0m, in \u001b[0;36m_AndValidator.__call__\u001b[0;34m(self, inst, attr, value)\u001b[0m\n\u001b[1;32m 3057\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, inst, attr, value):\n\u001b[1;32m 3058\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validators:\n\u001b[0;32m-> 3059\u001b[0m \u001b[43mv\u001b[49m\u001b[43m(\u001b[49m\u001b[43minst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/parameters/substance.py:77\u001b[0m, in \u001b[0;36mSubstanceParameter._validate_substance_data\u001b[0;34m(self, _, data)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;129m@data\u001b[39m\u001b[38;5;241m.\u001b[39mvalidator\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_validate_substance_data\u001b[39m( \u001b[38;5;66;03m# noqa: DOC101, DOC103\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;28mself\u001b[39m, _: Any, data: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Smiles]\n\u001b[1;32m 70\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 71\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Validate that the substance data, provided as SMILES, is valid.\u001b[39;00m\n\u001b[1;32m 72\u001b[0m \n\u001b[1;32m 73\u001b[0m \u001b[38;5;124;03m Raises:\u001b[39;00m\n\u001b[1;32m 74\u001b[0m \u001b[38;5;124;03m ValueError: If one or more of the SMILES are invalid.\u001b[39;00m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;124;03m ValueError: If the several entries represent the same substance.\u001b[39;00m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 77\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m chemistry\n\u001b[1;32m 79\u001b[0m \u001b[38;5;66;03m# Check for invalid SMILES\u001b[39;00m\n\u001b[1;32m 80\u001b[0m canonical_smiles \u001b[38;5;241m=\u001b[39m {}\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/utils/chemistry.py:21\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrdkit\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mChem\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrdMolDescriptors\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GetMorganFingerprintAsBitVect\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m OptionalImportError(\n\u001b[1;32m 22\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"The requested functionality requires the installation of optional \"\"\"\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"chemistry dependencies. Please run \"pip install 'baybe[chem]'\".\"\"\"\u001b[39;00m\n\u001b[1;32m 24\u001b[0m )\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbaybe\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnumerical\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DTypeFloatNumpy\n\u001b[1;32m 28\u001b[0m _mordred_calculator \u001b[38;5;241m=\u001b[39m Calculator(descriptors)\n", - "\u001b[0;31mOptionalImportError\u001b[0m: The requested functionality requires the installation of optional chemistry dependencies. Please run \"pip install 'baybe[chem]'\"." - ] - } - ], + "outputs": [], "source": [ "from baybe.parameters import (\n", - " CategoricalParameter,\n", - " NumericalDiscreteParameter,\n", - " SubstanceParameter,\n", + " NumericalContinuousParameter,\n", ")\n", "\n", "parameters = [\n", - " NumericalDiscreteParameter(\n", + " NumericalContinuousParameter(\n", " name=\"Time[h]\",\n", + " bounds=(0.500000, 672),\n", + " ),\n", + " NumericalContinuousParameter(\n", + " name=\"pH\",\n", + " bounds=(1, 14),\n", " ),\n", "]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -120,6 +128,45 @@ "\n", "searchspace = SearchSpace.from_product(parameters)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run the campaign" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe import Campaign\n", + "\n", + "campaign = Campaign(searchspace, objective)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Time[h] pH\n", + "0 638.310058 2.638211\n", + "1 475.753030 2.599470\n", + "2 452.512074 4.018597\n" + ] + } + ], + "source": [ + "df = campaign.recommend(batch_size=3)\n", + "print(df)" + ] } ], "metadata": { From 1a536657ceef852b16f9dbc8760b1d4a5545d5ad Mon Sep 17 00:00:00 2001 From: Alex6022 <7952557+Alex6022@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:45:05 +0000 Subject: [PATCH 3/3] Added data import to baybe_inhibitor notebook --- baybe-inhibitor.ipynb | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/baybe-inhibitor.ipynb b/baybe-inhibitor.ipynb index 23329e4..2a932e6 100644 --- a/baybe-inhibitor.ipynb +++ b/baybe-inhibitor.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -11,29 +11,44 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "df_AA2024 = pd.read_excel('data/filtered_AA2024.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\": Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n", - " warnings.warn(\n", - "/home/vscode/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" + " Time_h pH Inhib_Concentrat_M Efficiency\n", + "count 611.000000 611.000000 611.000000 611.000000\n", + "mean 135.801964 6.342062 0.006808 26.736841\n", + "std 201.683867 2.529080 0.014059 288.788317\n", + "min 0.500000 0.000000 0.000010 -4834.000000\n", + "25% 24.000000 4.000000 0.000500 30.000000\n", + "50% 24.000000 7.000000 0.001000 58.000000\n", + "75% 144.000000 7.000000 0.003000 87.950000\n", + "max 672.000000 10.000000 0.100000 100.000000\n" ] } ], "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from baybe import Campaign" + "print(df_AA2024.describe())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [