From 421de83750a86e94f4e20a196545e2ee3f43be11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20W=C3=BCrger?=
<44372393+koerper@users.noreply.github.com>
Date: Wed, 27 Mar 2024 18:31:29 +0000
Subject: [PATCH] Add stuff
---
src/tim_baybe-inhibitor.ipynb | 209 ++++++++++++++++++++++++++++++++--
1 file changed, 199 insertions(+), 10 deletions(-)
diff --git a/src/tim_baybe-inhibitor.ipynb b/src/tim_baybe-inhibitor.ipynb
index c584dc7..20f4168 100644
--- a/src/tim_baybe-inhibitor.ipynb
+++ b/src/tim_baybe-inhibitor.ipynb
@@ -18,7 +18,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Initizalization"
+ "# Initialization"
]
},
{
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -38,17 +38,197 @@
"import numpy as np\n",
"from baybe import Campaign\n",
"\n",
- "df_AA2024 = pd.read_excel('data/filtered_AA2024.xlsx')\n",
- "df_AA1000 = pd.read_excel('data/filtered_AA1000.xlsx')\n",
- "df_Al = pd.read_excel('data/filtered_Al.xlsx')"
+ "df_AA2024 = pd.read_excel('../data/filtered_AA2024.xlsx')\n",
+ "df_AA1000 = pd.read_excel('../data/filtered_AA1000.xlsx')\n",
+ "df_Al = pd.read_excel('../data/filtered_Al.xlsx')"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " SMILES | \n",
+ " Time_h | \n",
+ " pH | \n",
+ " Inhib_Concentrat_M | \n",
+ " Salt_Concentrat_M | \n",
+ " Efficiency | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " COCCOC(=O)OCSc1nc2c(s1)cccc2 | \n",
+ " 24.0 | \n",
+ " 4.0 | \n",
+ " 0.001 | \n",
+ " 0.1 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " COCCOC(=O)OCSc1nc2c(s1)cccc2 | \n",
+ " 24.0 | \n",
+ " 10.0 | \n",
+ " 0.001 | \n",
+ " 0.1 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Cc1ccc(c(c1)n1nc2c(n1)cccc2)O | \n",
+ " 24.0 | \n",
+ " 4.0 | \n",
+ " 0.001 | \n",
+ " 0.1 | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Cc1ccc(c(c1)n1nc2c(n1)cccc2)O | \n",
+ " 24.0 | \n",
+ " 10.0 | \n",
+ " 0.001 | \n",
+ " 0.1 | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O | \n",
+ " 24.0 | \n",
+ " 4.0 | \n",
+ " 0.001 | \n",
+ " 0.1 | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " SMILES Time_h pH Inhib_Concentrat_M \\\n",
+ "0 COCCOC(=O)OCSc1nc2c(s1)cccc2 24.0 4.0 0.001 \n",
+ "1 COCCOC(=O)OCSc1nc2c(s1)cccc2 24.0 10.0 0.001 \n",
+ "2 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O 24.0 4.0 0.001 \n",
+ "3 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O 24.0 10.0 0.001 \n",
+ "4 Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O 24.0 4.0 0.001 \n",
+ "\n",
+ " Salt_Concentrat_M Efficiency \n",
+ "0 0.1 0.0 \n",
+ "1 0.1 0.0 \n",
+ "2 0.1 30.0 \n",
+ "3 0.1 30.0 \n",
+ "4 0.1 30.0 "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_AA2024.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['COCCOC(=O)OCSc1nc2c(s1)cccc2', 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O',\n",
+ " 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O', 'On1nnc2c1cccc2',\n",
+ " 'c1ncn[nH]1', 'Sc1n[nH]cn1', 'S[C]1NC2=C[CH]C=NC2=N1',\n",
+ " 'S=c1[nH]c2c([nH]1)nccn2', 'Sc1ncc[nH]1',\n",
+ " 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1', 'CCSc1nnc(s1)N', 'CSc1nnc(s1)N',\n",
+ " 'Cc1ccc2c(c1)nc([nH]2)S', 'OC(=O)CS', 'Sc1nc2c([nH]1)cccc2',\n",
+ " 'OC(=O)c1ccccc1S', 'S=c1sc2c([nH]1)cccc2', 'OC(=O)c1cccnc1S',\n",
+ " 'Sc1ncccn1', 'c1ccc(nc1)c1ccccn1', 'Sc1nnc(s1)S',\n",
+ " 'Nc1cc(S)nc(n1)N', 'Nc1nc([nH]n1)C(=O)O', 'Nc1n[nH]cn1',\n",
+ " 'OC(=O)c1n[nH]c(n1)N', 'Nc1n[nH]c(n1)S', 'CS[C]1N[N]C(=N1)N',\n",
+ " 'C1=CC(=CC(=C1)S)C(=O)O', 'OC(=O)CCS', 'Oc1ccccc1c1nnc([nH]1)S',\n",
+ " 'Nn1cnnc1', 'Nc1ccnc(n1)S', 'Nn1c(NN)nnc1S', 'Nn1c(S)nnc1c1ccccc1',\n",
+ " 'Sc1nc(N)c2c(n1)[nH]nc2', 'Oc1ccc(cc1)C(=O)O', 'OC(=O)c1ccc(cc1)S',\n",
+ " 'Cn1cnnc1S', 'Sc1nc(N)c(c(n1)S)N', 'Nc1ncncc1N', 'Nc1cc(N)nc(n1)S',\n",
+ " 'Cc1cc(C)nc(n1)S', 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1',\n",
+ " 'COc1cccc(c1)c1n[nH]c(=S)[nH]1', 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1',\n",
+ " 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]', 'S=c1[nH]nc([nH]1)c1ccco1',\n",
+ " 'S=c1[nH]nc([nH]1)c1cccnc1', 'S=c1[nH]nc([nH]1)c1ccncc1',\n",
+ " 'Nc1n[nH]c(=S)s1', 'Cc1nsc(c1)N', 'Clc1ccc2c(c1)[nH]c(n2)S',\n",
+ " 'CCOc1ccc2c(c1)nc([nH]2)S', 'Cn1nnnc1S', 'OC(=O)Cn1nnnc1S',\n",
+ " 'COc1ccc2c(c1)[nH]c(=S)[nH]2', 'Cc1n[nH]c(=S)s1',\n",
+ " 'ClC([C]1N[N]C=N1)(Cl)Cl', 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl',\n",
+ " 'CSc1[nH]c2c(n1)cc(c(c2)C)C', 'Nc1ccc2c(c1)sc(=S)[nH]2',\n",
+ " 'OC(=O)c1ccc(=S)[nH]c1', 'Oc1cccc2c1nccc2',\n",
+ " 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C', 'S=c1[nH]c2c([nH]1)cncn2',\n",
+ " 'CC(=O)O', 'OC(=O)CCCCC(=O)O', 'OC(=O)c1ccccc1',\n",
+ " 'c1ccc2c(c1)[nH]nn2', 'OC(=O)c1ccc(cc1)c1ccccc1',\n",
+ " 'OC(=O)/C=C/c1ccccc1', 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O',\n",
+ " 'O[C@H]1C(=O)OCC1(C)C', 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O',\n",
+ " 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O', 'CC(=O)SSC(=O)C',\n",
+ " 'CCCCOP(=O)(OCCCC)O', 'CCN(C(=S)S)CC', 'O/N=C(/C(=N/O)/C)\\\\C',\n",
+ " 'CCCCCCCCCCCCc1ccccc1S([O])([O])O', 'CCCCCCCCCCCCOS(=O)(=O)O',\n",
+ " 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O',\n",
+ " 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1',\n",
+ " 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O', 'OCC(CO)O',\n",
+ " 'NCC(=O)O', 'OC(=O)CCCCCCCCCCCCCCC(=O)O', 'C1N2CN3CN1CN(C2)C3',\n",
+ " 'NO', 'COC(=O)CCCC1=CNC2=CC=CC=C21', 'OC(=O)c1ccncc1',\n",
+ " 'C1COCCN1CCCS(=O)(=O)O', 'OC(=O)c1cccnc1',\n",
+ " 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O', 'C(=O)(C(=O)[O-])[O-]',\n",
+ " 'OC(=O)c1ccc(cc1)N', 'Oc1ccc(cc1)S([O])([O])O', 'OC(=O)c1ccccn1',\n",
+ " 'OC(=O)c1ccccc1O', 'CCCCCCCCCCCCCCCCCC(=O)O', 'SC#N',\n",
+ " 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]',\n",
+ " '[O-]S(=O)[O-].[Na+].[Na+]', 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]',\n",
+ " 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]',\n",
+ " 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]',\n",
+ " 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C',\n",
+ " 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',\n",
+ " 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]',\n",
+ " '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]',\n",
+ " '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]',\n",
+ " '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]',\n",
+ " '[Cl-].[Cl-].[Cl-].[Ce+3]', 'CNCC(C1=CC(=CC=C1)O)O',\n",
+ " 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]',\n",
+ " 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]',\n",
+ " 'C1=CC=C(C(=C1)C=NNC(=S)N)O', 'C1=CC(=C(C=C1O)O)C=NNC(=S)N',\n",
+ " 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O',\n",
+ " 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O',\n",
+ " 'C1=CC2=NNN=C2C=C1Cl', 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]',\n",
+ " 'COC(=O)n1nnc2ccccc12'], dtype=object)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_AA2024.SMILES.unique()"
+ ]
},
{
"cell_type": "markdown",
@@ -110,7 +290,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Objective"
+ "## Target & Objective"
]
},
{
@@ -118,7 +298,16 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "from baybe.targets import NumericalTarget\n",
+ "from baybe.objective import Objective\n",
+ "\n",
+ "target = NumericalTarget(\n",
+ " name=\"Efficiency\",\n",
+ " mode=\"MAX\",\n",
+ ")\n",
+ "objective = Objective(mode=\"SINGLE\", targets=[target])"
+ ]
},
{
"cell_type": "markdown",