From e905b0879a0254c2294491dfe68c3d92638389d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Can=20=C3=96zkan?= <128815525+canozkan42@users.noreply.github.com> Date: Wed, 27 Mar 2024 20:56:35 +0000 Subject: [PATCH] working non-simulation now --- can_baybe-inhibitor.ipynb | 517 ++++++++++++++++++++++++++++---------- 1 file changed, 382 insertions(+), 135 deletions(-) diff --git a/can_baybe-inhibitor.ipynb b/can_baybe-inhibitor.ipynb index 381d3d0..26e7ce6 100644 --- a/can_baybe-inhibitor.ipynb +++ b/can_baybe-inhibitor.ipynb @@ -30,14 +30,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\": Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n", + "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\": Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n", " warnings.warn(\n", "/home/vscode/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" @@ -68,7 +68,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -437,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -454,80 +454,214 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[NumericalDiscreteParameter(name='Time_h', encoding=None, _values=[0.5, 1.0, 2.0, 3.0, 6.0, 24.0, 48.0, 72.0, 96.0, 120.0, 144.0, 168.0, 192.0, 240.0, 288.0, 336.0, 360.0, 384.0, 432.0, 480.0, 528.0, 576.0, 600.0, 624.0, 672.0], tolerance=0.0),\n", + " NumericalDiscreteParameter(name='pH', encoding=None, _values=[0.0, 3.3, 4.0, 4.4, 5.4, 5.5, 5.6, 7.0, 10.0], tolerance=0.0),\n", + " NumericalDiscreteParameter(name='Inhib_Concentrat_M', encoding=None, _values=[1e-05, 5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0008, 0.001, 0.0012, 0.0018, 0.0024, 0.003, 0.005, 0.01, 0.011, 0.021, 0.022, 0.031, 0.033, 0.042, 0.044, 0.05, 0.1], tolerance=0.0),\n", + " NumericalDiscreteParameter(name='Salt_Concentrat_M', encoding=None, _values=[0.0, 0.01, 0.05, 0.1, 0.5, 0.6], tolerance=0.0),\n", + " SubstanceParameter(name='SMILES', data={'COCCOC(=O)OCSc1nc2c(s1)cccc2': 'COCCOC(=O)OCSc1nc2c(s1)cccc2', 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O': 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O', 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O': 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O', 'On1nnc2c1cccc2': 'On1nnc2c1cccc2', 'c1ncn[nH]1': 'c1ncn[nH]1', 'Sc1n[nH]cn1': 'Sc1n[nH]cn1', 'S[C]1NC2=C[CH]C=NC2=N1': 'S[C]1NC2=C[CH]C=NC2=N1', 'S=c1[nH]c2c([nH]1)nccn2': 'S=c1[nH]c2c([nH]1)nccn2', 'Sc1ncc[nH]1': 'Sc1ncc[nH]1', 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1': 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1', 'CCSc1nnc(s1)N': 'CCSc1nnc(s1)N', 'CSc1nnc(s1)N': 'CSc1nnc(s1)N', 'Cc1ccc2c(c1)nc([nH]2)S': 'Cc1ccc2c(c1)nc([nH]2)S', 'OC(=O)CS': 'OC(=O)CS', 'Sc1nc2c([nH]1)cccc2': 'Sc1nc2c([nH]1)cccc2', 'OC(=O)c1ccccc1S': 'OC(=O)c1ccccc1S', 'S=c1sc2c([nH]1)cccc2': 'S=c1sc2c([nH]1)cccc2', 'OC(=O)c1cccnc1S': 'OC(=O)c1cccnc1S', 'Sc1ncccn1': 'Sc1ncccn1', 'c1ccc(nc1)c1ccccn1': 'c1ccc(nc1)c1ccccn1', 'Sc1nnc(s1)S': 'Sc1nnc(s1)S', 'Nc1cc(S)nc(n1)N': 'Nc1cc(S)nc(n1)N', 'Nc1nc([nH]n1)C(=O)O': 'Nc1nc([nH]n1)C(=O)O', 'Nc1n[nH]cn1': 'Nc1n[nH]cn1', 'OC(=O)c1n[nH]c(n1)N': 'OC(=O)c1n[nH]c(n1)N', 'Nc1n[nH]c(n1)S': 'Nc1n[nH]c(n1)S', 'CS[C]1N[N]C(=N1)N': 'CS[C]1N[N]C(=N1)N', 'C1=CC(=CC(=C1)S)C(=O)O': 'C1=CC(=CC(=C1)S)C(=O)O', 'OC(=O)CCS': 'OC(=O)CCS', 'Oc1ccccc1c1nnc([nH]1)S': 'Oc1ccccc1c1nnc([nH]1)S', 'Nn1cnnc1': 'Nn1cnnc1', 'Nc1ccnc(n1)S': 'Nc1ccnc(n1)S', 'Nn1c(NN)nnc1S': 'Nn1c(NN)nnc1S', 'Nn1c(S)nnc1c1ccccc1': 'Nn1c(S)nnc1c1ccccc1', 'Sc1nc(N)c2c(n1)[nH]nc2': 'Sc1nc(N)c2c(n1)[nH]nc2', 'Oc1ccc(cc1)C(=O)O': 'Oc1ccc(cc1)C(=O)O', 'OC(=O)c1ccc(cc1)S': 'OC(=O)c1ccc(cc1)S', 'Cn1cnnc1S': 'Cn1cnnc1S', 'Sc1nc(N)c(c(n1)S)N': 'Sc1nc(N)c(c(n1)S)N', 'Nc1ncncc1N': 'Nc1ncncc1N', 'Nc1cc(N)nc(n1)S': 'Nc1cc(N)nc(n1)S', 'Cc1cc(C)nc(n1)S': 'Cc1cc(C)nc(n1)S', 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1': 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1', 'COc1cccc(c1)c1n[nH]c(=S)[nH]1': 'COc1cccc(c1)c1n[nH]c(=S)[nH]1', 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1': 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1', 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]': 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]', 'S=c1[nH]nc([nH]1)c1ccco1': 'S=c1[nH]nc([nH]1)c1ccco1', 'S=c1[nH]nc([nH]1)c1cccnc1': 'S=c1[nH]nc([nH]1)c1cccnc1', 'S=c1[nH]nc([nH]1)c1ccncc1': 'S=c1[nH]nc([nH]1)c1ccncc1', 'Nc1n[nH]c(=S)s1': 'Nc1n[nH]c(=S)s1', 'Cc1nsc(c1)N': 'Cc1nsc(c1)N', 'Clc1ccc2c(c1)[nH]c(n2)S': 'Clc1ccc2c(c1)[nH]c(n2)S', 'CCOc1ccc2c(c1)nc([nH]2)S': 'CCOc1ccc2c(c1)nc([nH]2)S', 'Cn1nnnc1S': 'Cn1nnnc1S', 'OC(=O)Cn1nnnc1S': 'OC(=O)Cn1nnnc1S', 'COc1ccc2c(c1)[nH]c(=S)[nH]2': 'COc1ccc2c(c1)[nH]c(=S)[nH]2', 'Cc1n[nH]c(=S)s1': 'Cc1n[nH]c(=S)s1', 'ClC([C]1N[N]C=N1)(Cl)Cl': 'ClC([C]1N[N]C=N1)(Cl)Cl', 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl': 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl', 'CSc1[nH]c2c(n1)cc(c(c2)C)C': 'CSc1[nH]c2c(n1)cc(c(c2)C)C', 'Nc1ccc2c(c1)sc(=S)[nH]2': 'Nc1ccc2c(c1)sc(=S)[nH]2', 'OC(=O)c1ccc(=S)[nH]c1': 'OC(=O)c1ccc(=S)[nH]c1', 'Oc1cccc2c1nccc2': 'Oc1cccc2c1nccc2', 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C': 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C', 'S=c1[nH]c2c([nH]1)cncn2': 'S=c1[nH]c2c([nH]1)cncn2', 'CC(=O)O': 'CC(=O)O', 'OC(=O)CCCCC(=O)O': 'OC(=O)CCCCC(=O)O', 'OC(=O)c1ccccc1': 'OC(=O)c1ccccc1', 'c1ccc2c(c1)[nH]nn2': 'c1ccc2c(c1)[nH]nn2', 'OC(=O)c1ccc(cc1)c1ccccc1': 'OC(=O)c1ccc(cc1)c1ccccc1', 'OC(=O)/C=C/c1ccccc1': 'OC(=O)/C=C/c1ccccc1', 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O': 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O', 'O[C@H]1C(=O)OCC1(C)C': 'O[C@H]1C(=O)OCC1(C)C', 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O': 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O', 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O', 'CC(=O)SSC(=O)C': 'CC(=O)SSC(=O)C', 'CCCCOP(=O)(OCCCC)O': 'CCCCOP(=O)(OCCCC)O', 'CCN(C(=S)S)CC': 'CCN(C(=S)S)CC', 'O/N=C(/C(=N/O)/C)\\\\C': 'O/N=C(/C(=N/O)/C)\\\\C', 'CCCCCCCCCCCCc1ccccc1S([O])([O])O': 'CCCCCCCCCCCCc1ccccc1S([O])([O])O', 'CCCCCCCCCCCCOS(=O)(=O)O': 'CCCCCCCCCCCCOS(=O)(=O)O', 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O': 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O', 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1': 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1', 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O', 'OCC(CO)O': 'OCC(CO)O', 'NCC(=O)O': 'NCC(=O)O', 'OC(=O)CCCCCCCCCCCCCCC(=O)O': 'OC(=O)CCCCCCCCCCCCCCC(=O)O', 'C1N2CN3CN1CN(C2)C3': 'C1N2CN3CN1CN(C2)C3', 'NO': 'NO', 'COC(=O)CCCC1=CNC2=CC=CC=C21': 'COC(=O)CCCC1=CNC2=CC=CC=C21', 'OC(=O)c1ccncc1': 'OC(=O)c1ccncc1', 'C1COCCN1CCCS(=O)(=O)O': 'C1COCCN1CCCS(=O)(=O)O', 'OC(=O)c1cccnc1': 'OC(=O)c1cccnc1', 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O': 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O', 'C(=O)(C(=O)[O-])[O-]': 'C(=O)(C(=O)[O-])[O-]', 'OC(=O)c1ccc(cc1)N': 'OC(=O)c1ccc(cc1)N', 'Oc1ccc(cc1)S([O])([O])O': 'Oc1ccc(cc1)S([O])([O])O', 'OC(=O)c1ccccn1': 'OC(=O)c1ccccn1', 'OC(=O)c1ccccc1O': 'OC(=O)c1ccccc1O', 'CCCCCCCCCCCCCCCCCC(=O)O': 'CCCCCCCCCCCCCCCCCC(=O)O', 'SC#N': 'SC#N', 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]': 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]', '[O-]S(=O)[O-].[Na+].[Na+]': '[O-]S(=O)[O-].[Na+].[Na+]', 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C': 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C', 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C', 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]': 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]', '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]': '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]', '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]': '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]', '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]': '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]', '[Cl-].[Cl-].[Cl-].[Ce+3]': '[Cl-].[Cl-].[Cl-].[Ce+3]', 'CNCC(C1=CC(=CC=C1)O)O': 'CNCC(C1=CC(=CC=C1)O)O', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]', 'C1=CC=C(C(=C1)C=NNC(=S)N)O': 'C1=CC=C(C(=C1)C=NNC(=S)N)O', 'C1=CC(=C(C=C1O)O)C=NNC(=S)N': 'C1=CC(=C(C=C1O)O)C=NNC(=S)N', 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O': 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O', 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O': 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O', 'C1=CC2=NNN=C2C=C1Cl': 'C1=CC2=NNN=C2C=C1Cl', 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]': 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]', 'COC(=O)n1nnc2ccccc12': 'COC(=O)n1nnc2ccccc12'}, decorrelate=0.7, encoding=)]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# parameters\n", - "from baybe.parameters import NumericalDiscreteParameter, SubstanceParameter, NumericalContinuousParameter\n", - "from baybe.searchspace import SearchSpace\n", - "\n", - "# chemical space dictionary\n", - "unique_SMILES = df_active.SMILES.unique()\n", - "\n", - "def list_to_dict(input_list):\n", - " return {item: item for item in input_list}\n", - "\n", - "smiles_dict =list_to_dict(unique_SMILES)\n", - "\n", - "\n", "# parameters\n", "parameters = [\n", "NumericalDiscreteParameter(\n", - " name=\"Time (h)\",\n", - " values=np.arange(1, 25, 1)\n", + " name=\"Time_h\",\n", + " values=df_active['Time_h'].unique(),\n", " # tolerance = 0.004, assume certain experimental noise for each parameter measurement?\n", "),\n", "NumericalDiscreteParameter(\n", " name=\"pH\",\n", - " values=np.arange(-1, 15.1, 0.1)\n", + " values=df_active['pH'].unique(),\n", " # tolerance = 0.004\n", " ), \n", "NumericalDiscreteParameter( # Set this as continuous, the values seem quite small?\n", - " name=\"Inhibitor Concentration (M)\",\n", - " values=np.arange(0, 0.1, 0.01), # Remove data outliers like 0.1?\n", + " name=\"Inhib_Concentrat_M\",\n", + " values= df_active['Inhib_Concentrat_M'].unique(),\n", " # tolerance = 0.004\n", " ),\n", "NumericalDiscreteParameter(\n", - " name=\"Salt Concentration (M)\",\n", - " values=np.arange(0, 2.01, 0.01),\n", + " name=\"Salt_Concentrat_M\",\n", + " values=df_active['Salt_Concentrat_M'].unique(),\n", " # tolerance = 0.004\n", " ),\n", "SubstanceParameter(\n", - " name=\"Inhibitor\",\n", + " name=\"SMILES\",\n", " data=smiles_dict,\n", " encoding=\"MORDRED\", # optional\n", " decorrelate=0.7, # optional\n", - " )\n", - " ]" + " ) \n", + " ]\n", + "parameters" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 9, "metadata": {}, "outputs": [ { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", - "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", - "\u001b[1;31mClick here for more info. \n", - "\u001b[1;31mView Jupyter log for further details." - ] + "data": { + "text/plain": [ + "SearchSpace(discrete=SubspaceDiscrete(parameters=[NumericalDiscreteParameter(name='Time_h', encoding=None, _values=[0.5, 1.0, 2.0, 3.0, 6.0, 24.0, 48.0, 72.0, 96.0, 120.0, 144.0, 168.0, 192.0, 240.0, 288.0, 336.0, 360.0, 384.0, 432.0, 480.0, 528.0, 576.0, 600.0, 624.0, 672.0], tolerance=0.0), NumericalDiscreteParameter(name='pH', encoding=None, _values=[0.0, 3.3, 4.0, 4.4, 5.4, 5.5, 5.6, 7.0, 10.0], tolerance=0.0), NumericalDiscreteParameter(name='Inhib_Concentrat_M', encoding=None, _values=[1e-05, 5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0008, 0.001, 0.0012, 0.0018, 0.0024, 0.003, 0.005, 0.01, 0.011, 0.021, 0.022, 0.031, 0.033, 0.042, 0.044, 0.05, 0.1], tolerance=0.0), NumericalDiscreteParameter(name='Salt_Concentrat_M', encoding=None, _values=[0.0, 0.01, 0.05, 0.1, 0.5, 0.6], tolerance=0.0), SubstanceParameter(name='SMILES', data={'COCCOC(=O)OCSc1nc2c(s1)cccc2': 'COCCOC(=O)OCSc1nc2c(s1)cccc2', 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O': 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O', 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O': 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O', 'On1nnc2c1cccc2': 'On1nnc2c1cccc2', 'c1ncn[nH]1': 'c1ncn[nH]1', 'Sc1n[nH]cn1': 'Sc1n[nH]cn1', 'S[C]1NC2=C[CH]C=NC2=N1': 'S[C]1NC2=C[CH]C=NC2=N1', 'S=c1[nH]c2c([nH]1)nccn2': 'S=c1[nH]c2c([nH]1)nccn2', 'Sc1ncc[nH]1': 'Sc1ncc[nH]1', 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1': 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1', 'CCSc1nnc(s1)N': 'CCSc1nnc(s1)N', 'CSc1nnc(s1)N': 'CSc1nnc(s1)N', 'Cc1ccc2c(c1)nc([nH]2)S': 'Cc1ccc2c(c1)nc([nH]2)S', 'OC(=O)CS': 'OC(=O)CS', 'Sc1nc2c([nH]1)cccc2': 'Sc1nc2c([nH]1)cccc2', 'OC(=O)c1ccccc1S': 'OC(=O)c1ccccc1S', 'S=c1sc2c([nH]1)cccc2': 'S=c1sc2c([nH]1)cccc2', 'OC(=O)c1cccnc1S': 'OC(=O)c1cccnc1S', 'Sc1ncccn1': 'Sc1ncccn1', 'c1ccc(nc1)c1ccccn1': 'c1ccc(nc1)c1ccccn1', 'Sc1nnc(s1)S': 'Sc1nnc(s1)S', 'Nc1cc(S)nc(n1)N': 'Nc1cc(S)nc(n1)N', 'Nc1nc([nH]n1)C(=O)O': 'Nc1nc([nH]n1)C(=O)O', 'Nc1n[nH]cn1': 'Nc1n[nH]cn1', 'OC(=O)c1n[nH]c(n1)N': 'OC(=O)c1n[nH]c(n1)N', 'Nc1n[nH]c(n1)S': 'Nc1n[nH]c(n1)S', 'CS[C]1N[N]C(=N1)N': 'CS[C]1N[N]C(=N1)N', 'C1=CC(=CC(=C1)S)C(=O)O': 'C1=CC(=CC(=C1)S)C(=O)O', 'OC(=O)CCS': 'OC(=O)CCS', 'Oc1ccccc1c1nnc([nH]1)S': 'Oc1ccccc1c1nnc([nH]1)S', 'Nn1cnnc1': 'Nn1cnnc1', 'Nc1ccnc(n1)S': 'Nc1ccnc(n1)S', 'Nn1c(NN)nnc1S': 'Nn1c(NN)nnc1S', 'Nn1c(S)nnc1c1ccccc1': 'Nn1c(S)nnc1c1ccccc1', 'Sc1nc(N)c2c(n1)[nH]nc2': 'Sc1nc(N)c2c(n1)[nH]nc2', 'Oc1ccc(cc1)C(=O)O': 'Oc1ccc(cc1)C(=O)O', 'OC(=O)c1ccc(cc1)S': 'OC(=O)c1ccc(cc1)S', 'Cn1cnnc1S': 'Cn1cnnc1S', 'Sc1nc(N)c(c(n1)S)N': 'Sc1nc(N)c(c(n1)S)N', 'Nc1ncncc1N': 'Nc1ncncc1N', 'Nc1cc(N)nc(n1)S': 'Nc1cc(N)nc(n1)S', 'Cc1cc(C)nc(n1)S': 'Cc1cc(C)nc(n1)S', 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1': 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1', 'COc1cccc(c1)c1n[nH]c(=S)[nH]1': 'COc1cccc(c1)c1n[nH]c(=S)[nH]1', 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1': 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1', 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]': 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]', 'S=c1[nH]nc([nH]1)c1ccco1': 'S=c1[nH]nc([nH]1)c1ccco1', 'S=c1[nH]nc([nH]1)c1cccnc1': 'S=c1[nH]nc([nH]1)c1cccnc1', 'S=c1[nH]nc([nH]1)c1ccncc1': 'S=c1[nH]nc([nH]1)c1ccncc1', 'Nc1n[nH]c(=S)s1': 'Nc1n[nH]c(=S)s1', 'Cc1nsc(c1)N': 'Cc1nsc(c1)N', 'Clc1ccc2c(c1)[nH]c(n2)S': 'Clc1ccc2c(c1)[nH]c(n2)S', 'CCOc1ccc2c(c1)nc([nH]2)S': 'CCOc1ccc2c(c1)nc([nH]2)S', 'Cn1nnnc1S': 'Cn1nnnc1S', 'OC(=O)Cn1nnnc1S': 'OC(=O)Cn1nnnc1S', 'COc1ccc2c(c1)[nH]c(=S)[nH]2': 'COc1ccc2c(c1)[nH]c(=S)[nH]2', 'Cc1n[nH]c(=S)s1': 'Cc1n[nH]c(=S)s1', 'ClC([C]1N[N]C=N1)(Cl)Cl': 'ClC([C]1N[N]C=N1)(Cl)Cl', 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl': 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl', 'CSc1[nH]c2c(n1)cc(c(c2)C)C': 'CSc1[nH]c2c(n1)cc(c(c2)C)C', 'Nc1ccc2c(c1)sc(=S)[nH]2': 'Nc1ccc2c(c1)sc(=S)[nH]2', 'OC(=O)c1ccc(=S)[nH]c1': 'OC(=O)c1ccc(=S)[nH]c1', 'Oc1cccc2c1nccc2': 'Oc1cccc2c1nccc2', 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C': 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C', 'S=c1[nH]c2c([nH]1)cncn2': 'S=c1[nH]c2c([nH]1)cncn2', 'CC(=O)O': 'CC(=O)O', 'OC(=O)CCCCC(=O)O': 'OC(=O)CCCCC(=O)O', 'OC(=O)c1ccccc1': 'OC(=O)c1ccccc1', 'c1ccc2c(c1)[nH]nn2': 'c1ccc2c(c1)[nH]nn2', 'OC(=O)c1ccc(cc1)c1ccccc1': 'OC(=O)c1ccc(cc1)c1ccccc1', 'OC(=O)/C=C/c1ccccc1': 'OC(=O)/C=C/c1ccccc1', 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O': 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O', 'O[C@H]1C(=O)OCC1(C)C': 'O[C@H]1C(=O)OCC1(C)C', 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O': 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O', 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O', 'CC(=O)SSC(=O)C': 'CC(=O)SSC(=O)C', 'CCCCOP(=O)(OCCCC)O': 'CCCCOP(=O)(OCCCC)O', 'CCN(C(=S)S)CC': 'CCN(C(=S)S)CC', 'O/N=C(/C(=N/O)/C)\\\\C': 'O/N=C(/C(=N/O)/C)\\\\C', 'CCCCCCCCCCCCc1ccccc1S([O])([O])O': 'CCCCCCCCCCCCc1ccccc1S([O])([O])O', 'CCCCCCCCCCCCOS(=O)(=O)O': 'CCCCCCCCCCCCOS(=O)(=O)O', 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O': 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O', 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1': 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1', 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O', 'OCC(CO)O': 'OCC(CO)O', 'NCC(=O)O': 'NCC(=O)O', 'OC(=O)CCCCCCCCCCCCCCC(=O)O': 'OC(=O)CCCCCCCCCCCCCCC(=O)O', 'C1N2CN3CN1CN(C2)C3': 'C1N2CN3CN1CN(C2)C3', 'NO': 'NO', 'COC(=O)CCCC1=CNC2=CC=CC=C21': 'COC(=O)CCCC1=CNC2=CC=CC=C21', 'OC(=O)c1ccncc1': 'OC(=O)c1ccncc1', 'C1COCCN1CCCS(=O)(=O)O': 'C1COCCN1CCCS(=O)(=O)O', 'OC(=O)c1cccnc1': 'OC(=O)c1cccnc1', 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O': 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O', 'C(=O)(C(=O)[O-])[O-]': 'C(=O)(C(=O)[O-])[O-]', 'OC(=O)c1ccc(cc1)N': 'OC(=O)c1ccc(cc1)N', 'Oc1ccc(cc1)S([O])([O])O': 'Oc1ccc(cc1)S([O])([O])O', 'OC(=O)c1ccccn1': 'OC(=O)c1ccccn1', 'OC(=O)c1ccccc1O': 'OC(=O)c1ccccc1O', 'CCCCCCCCCCCCCCCCCC(=O)O': 'CCCCCCCCCCCCCCCCCC(=O)O', 'SC#N': 'SC#N', 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]': 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]', '[O-]S(=O)[O-].[Na+].[Na+]': '[O-]S(=O)[O-].[Na+].[Na+]', 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C': 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C', 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C', 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]': 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]', '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]': '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]', '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]': '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]', '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]': '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]', '[Cl-].[Cl-].[Cl-].[Ce+3]': '[Cl-].[Cl-].[Cl-].[Ce+3]', 'CNCC(C1=CC(=CC=C1)O)O': 'CNCC(C1=CC(=CC=C1)O)O', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]', 'C1=CC=C(C(=C1)C=NNC(=S)N)O': 'C1=CC=C(C(=C1)C=NNC(=S)N)O', 'C1=CC(=C(C=C1O)O)C=NNC(=S)N': 'C1=CC(=C(C=C1O)O)C=NNC(=S)N', 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O': 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O', 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O': 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O', 'C1=CC2=NNN=C2C=C1Cl': 'C1=CC2=NNN=C2C=C1Cl', 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]': 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]', 'COC(=O)n1nnc2ccccc12': 'COC(=O)n1nnc2ccccc12'}, decorrelate=0.7, encoding=)], exp_rep= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "0 24.0 4.0 0.0010 0.10 \n", + "1 24.0 10.0 0.0010 0.10 \n", + "2 24.0 4.0 0.0010 0.10 \n", + "3 24.0 10.0 0.0010 0.10 \n", + "4 24.0 4.0 0.0010 0.10 \n", + ".. ... ... ... ... \n", + "606 24.0 7.0 0.0005 0.05 \n", + "607 24.0 7.0 0.0005 0.05 \n", + "608 24.0 7.0 0.0005 0.05 \n", + "609 24.0 7.0 0.0005 0.05 \n", + "610 24.0 7.0 0.0005 0.05 \n", + "\n", + " SMILES \n", + "0 COCCOC(=O)OCSc1nc2c(s1)cccc2 \n", + "1 COCCOC(=O)OCSc1nc2c(s1)cccc2 \n", + "2 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O \n", + "3 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O \n", + "4 Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O \n", + ".. ... \n", + "606 S=c1sc2c([nH]1)cccc2 \n", + "607 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + "608 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + "609 C(=O)(C(=O)[O-])[O-] \n", + "610 C(=O)(C(=O)[O-])[O-] \n", + "\n", + "[611 rows x 5 columns], metadata= was_recommended was_measured dont_recommend\n", + "0 False False False\n", + "1 False False False\n", + "2 False False False\n", + "3 False False False\n", + "4 False False False\n", + ".. ... ... ...\n", + "606 False False False\n", + "607 False False False\n", + "608 False False False\n", + "609 False False False\n", + "610 False False False\n", + "\n", + "[611 rows x 3 columns], empty_encoding=False, constraints=[], comp_rep= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M SMILES_MORDRED_ABC \\\n", + "0 24.0 4.0 0.0010 0.10 14.211085 \n", + "1 24.0 10.0 0.0010 0.10 14.211085 \n", + "2 24.0 4.0 0.0010 0.10 13.532488 \n", + "3 24.0 10.0 0.0010 0.10 13.532488 \n", + "4 24.0 4.0 0.0010 0.10 16.206679 \n", + ".. ... ... ... ... ... \n", + "606 24.0 7.0 0.0005 0.05 7.847124 \n", + "607 24.0 7.0 0.0005 0.05 9.238929 \n", + "608 24.0 7.0 0.0005 0.05 9.238929 \n", + "609 24.0 7.0 0.0005 0.05 3.932653 \n", + "610 24.0 7.0 0.0005 0.05 3.932653 \n", + "\n", + " SMILES_MORDRED_nAcid SMILES_MORDRED_nBase SMILES_MORDRED_nAromAtom \\\n", + "0 0.0 0.0 9.0 \n", + "1 0.0 0.0 9.0 \n", + "2 0.0 0.0 15.0 \n", + "3 0.0 0.0 15.0 \n", + "4 0.0 0.0 11.0 \n", + ".. ... ... ... \n", + "606 0.0 0.0 9.0 \n", + "607 3.0 0.0 0.0 \n", + "608 3.0 0.0 0.0 \n", + "609 2.0 0.0 0.0 \n", + "610 2.0 0.0 0.0 \n", + "\n", + " SMILES_MORDRED_nBridgehead SMILES_MORDRED_nHetero ... \\\n", + "0 0.0 7.0 ... \n", + "1 0.0 7.0 ... \n", + "2 0.0 4.0 ... \n", + "3 0.0 4.0 ... \n", + "4 0.0 5.0 ... \n", + ".. ... ... ... \n", + "606 0.0 3.0 ... \n", + "607 0.0 7.0 ... \n", + "608 0.0 7.0 ... \n", + "609 0.0 4.0 ... \n", + "610 0.0 4.0 ... \n", + "\n", + " SMILES_MORDRED_JGI2 SMILES_MORDRED_JGI3 SMILES_MORDRED_JGI4 \\\n", + "0 0.053333 0.047348 0.025679 \n", + "1 0.053333 0.047348 0.025679 \n", + "2 0.074074 0.049167 0.050028 \n", + "3 0.074074 0.049167 0.050028 \n", + "4 0.104167 0.046456 0.055718 \n", + ".. ... ... ... \n", + "606 0.059259 0.071970 0.042870 \n", + "607 0.117647 0.085938 0.047059 \n", + "608 0.117647 0.085938 0.047059 \n", + "609 0.148148 0.000000 0.000000 \n", + "610 0.148148 0.000000 0.000000 \n", + "\n", + " SMILES_MORDRED_JGI5 SMILES_MORDRED_JGI6 SMILES_MORDRED_JGI7 \\\n", + "0 0.021778 0.007407 0.014227 \n", + "1 0.021778 0.007407 0.014227 \n", + "2 0.026569 0.016799 0.012762 \n", + "3 0.026569 0.016799 0.012762 \n", + "4 0.031875 0.020352 0.014901 \n", + ".. ... ... ... \n", + "606 0.040000 0.000000 0.000000 \n", + "607 0.018519 0.000000 0.000000 \n", + "608 0.018519 0.000000 0.000000 \n", + "609 0.000000 0.000000 0.000000 \n", + "610 0.000000 0.000000 0.000000 \n", + "\n", + " SMILES_MORDRED_JGI8 SMILES_MORDRED_JGI9 SMILES_MORDRED_TopoShapeIndex \\\n", + "0 0.008230 0.006734 0.857143 \n", + "1 0.008230 0.006734 0.857143 \n", + "2 0.010204 0.000000 1.000000 \n", + "3 0.010204 0.000000 1.000000 \n", + "4 0.011255 0.006063 0.833333 \n", + ".. ... ... ... \n", + "606 0.000000 0.000000 0.666667 \n", + "607 0.000000 0.000000 1.000000 \n", + "608 0.000000 0.000000 1.000000 \n", + "609 0.000000 0.000000 0.500000 \n", + "610 0.000000 0.000000 0.500000 \n", + "\n", + " SMILES_MORDRED_MWC06 \n", + "0 7.787797 \n", + "1 7.787797 \n", + "2 8.042056 \n", + "3 8.042056 \n", + "4 8.108623 \n", + ".. ... \n", + "606 7.372118 \n", + "607 7.377134 \n", + "608 7.377134 \n", + "609 5.837730 \n", + "610 5.837730 \n", + "\n", + "[611 rows x 147 columns]), continuous=SubspaceContinuous(parameters=[], constraints_lin_eq=[], constraints_lin_ineq=[]))" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# define search space\n", - "searchspace = SearchSpace.from_product(parameters)\n", + "df_no_target = lookup.drop('Efficiency', axis=1)\n", + "\n", + "searchspace = SearchSpace.from_dataframe(df = df_no_target, parameters=parameters)\n", "searchspace" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -550,7 +684,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -570,28 +704,29 @@ " \u001b[1mDiscrete Search Space\u001b[0m\n", " \n", " \u001b[1mDiscrete Parameters\u001b[0m\n", - " Name Type Num_Values Encoding\n", - " 0 Time (h) NumericalDiscreteParameter 24 None\n", - " 1 pH NumericalDiscreteParameter 161 None\n", - " 2 Inhibitor Concentration (M) NumericalDiscreteParameter 10 None\n", - " 3 Salt Concentration (M) NumericalDiscreteParameter 201 None\n", + " Name Type Num_Values Encoding\n", + " 0 Time_h NumericalDiscreteParameter 25 None\n", + " 1 pH NumericalDiscreteParameter 9 None\n", + " 2 Inhib_Concentrat_M NumericalDiscreteParameter 25 None\n", + " 3 Salt_Concentrat_M NumericalDiscreteParameter 6 None\n", + " 4 SMILES SubstanceParameter 123 SubstanceEncoding.MORDRED\n", " \n", " \u001b[1mExperimental Representation\u001b[0m\n", - " Time (h) pH Inhibitor Concentration (M) Salt Concentration (M)\n", - " 0 1.0 -1.0 0.00 0.00\n", - " 1 1.0 -1.0 0.00 0.01\n", - " 2 1.0 -1.0 0.00 0.02\n", - " ... ... ... ... ...\n", - " 7766637 24.0 15.0 0.09 1.98\n", - " 7766638 24.0 15.0 0.09 1.99\n", - " 7766639 24.0 15.0 0.09 2.00\n", + " Time_h pH ... Salt_Concentrat_M SMILES\n", + " 0 24.0 4.0 ... 0.10 COCCOC(=O)OCSc1nc2c(s1)cccc2\n", + " 1 24.0 10.0 ... 0.10 COCCOC(=O)OCSc1nc2c(s1)cccc2\n", + " 2 24.0 4.0 ... 0.10 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O\n", + " .. ... ... ... ... ...\n", + " 608 24.0 7.0 ... 0.05 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O\n", + " 609 24.0 7.0 ... 0.05 C(=O)(C(=O)[O-])[O-]\n", + " 610 24.0 7.0 ... 0.05 C(=O)(C(=O)[O-])[O-]\n", " \n", - " [7766640 rows x 4 columns]\n", + " [611 rows x 5 columns]\n", " \n", " \u001b[1mMetadata:\u001b[0m\n", - " was_recommended: 0/7766640\n", - " was_measured: 0/7766640\n", - " dont_recommend: 0/7766640\n", + " was_recommended: 0/611\n", + " was_measured: 0/611\n", + " dont_recommend: 0/611\n", " \n", " \u001b[1mConstraints\u001b[0m\n", " Empty DataFrame\n", @@ -599,16 +734,16 @@ " Index: []\n", " \n", " \u001b[1mComputational Representation\u001b[0m\n", - " Time (h) pH Inhibitor Concentration (M) Salt Concentration (M)\n", - " 0 1.0 -1.0 0.00 0.00\n", - " 1 1.0 -1.0 0.00 0.01\n", - " 2 1.0 -1.0 0.00 0.02\n", - " ... ... ... ... ...\n", - " 7766637 24.0 15.0 0.09 1.98\n", - " 7766638 24.0 15.0 0.09 1.99\n", - " 7766639 24.0 15.0 0.09 2.00\n", + " Time_h pH ... SMILES_MORDRED_TopoShapeIndex SMILES_MORDRED_MWC06\n", + " 0 24.0 4.0 ... 0.857143 7.787797\n", + " 1 24.0 10.0 ... 0.857143 7.787797\n", + " 2 24.0 4.0 ... 1.000000 8.042056\n", + " .. ... ... ... ... ...\n", + " 608 24.0 7.0 ... 1.000000 7.377134\n", + " 609 24.0 7.0 ... 0.500000 5.837730\n", + " 610 24.0 7.0 ... 0.500000 5.837730\n", " \n", - " [7766640 rows x 4 columns]\n", + " [611 rows x 147 columns]\n", " \n", " \u001b[1mObjective\u001b[0m\n", " \n", @@ -655,7 +790,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -665,11 +800,11 @@ "\n", "\n", "Recommended experiments: \n", - "| | Time (h) | pH | Inhibitor Concentration (M) | Salt Concentration (M) |\n", - "|--------:|-----------:|-----:|------------------------------:|-------------------------:|\n", - "| 5968590 | 19 | 6.1 | 0.04 | 0.96 |\n", - "| 3119057 | 10 | 9.2 | 0.07 | 1.4 |\n", - "| 4576889 | 15 | 1.3 | 0 | 1.19 |\n" + "| | Time_h | pH | Inhib_Concentrat_M | Salt_Concentrat_M | SMILES |\n", + "|----:|---------:|-----:|---------------------:|--------------------:|:---------------------------------------------------------------------|\n", + "| 484 | 480 | 7 | 0.031 | 0.05 | C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2] |\n", + "| 227 | 0.5 | 7 | 0.01 | 0.6 | C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-] |\n", + "| 394 | 144 | 7 | 1e-05 | 0.01 | [N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3] |\n" ] } ], @@ -682,9 +817,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Input row with index 227 has multiple matches with the search space. This could indicate that something went wrong. Matching only first occurrence.\n" + ] + } + ], "source": [ "new_rec[\"Efficiency\"] = [79.8, 54.1, 59.4]\n", "campaign.add_measurements(new_rec)" @@ -692,65 +835,161 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Campaign(searchspace=SearchSpace(discrete=SubspaceDiscrete(parameters=[NumericalDiscreteParameter(name='Time (h)', encoding=None, _values=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0], tolerance=0.0), NumericalDiscreteParameter(name='pH', encoding=None, _values=[-1.0, -0.9, -0.8, -0.7000000000000001, -0.6000000000000001, -0.5000000000000001, -0.40000000000000013, -0.30000000000000016, -0.20000000000000018, -0.1000000000000002, -2.220446049250313e-16, 0.09999999999999964, 0.19999999999999973, 0.2999999999999998, 0.3999999999999997, 0.49999999999999956, 0.5999999999999996, 0.6999999999999997, 0.7999999999999996, 0.8999999999999995, 0.9999999999999996, 1.0999999999999996, 1.1999999999999993, 1.2999999999999994, 1.3999999999999995, 1.4999999999999996, 1.5999999999999996, 1.6999999999999993, 1.7999999999999994, 1.8999999999999995, 1.9999999999999991, 2.099999999999999, 2.1999999999999993, 2.2999999999999994, 2.3999999999999995, 2.499999999999999, 2.599999999999999, 2.6999999999999993, 2.799999999999999, 2.899999999999999, 2.999999999999999, 3.0999999999999988, 3.1999999999999993, 3.299999999999999, 3.3999999999999986, 3.499999999999999, 3.5999999999999988, 3.6999999999999993, 3.799999999999999, 3.8999999999999986, 3.999999999999999, 4.099999999999999, 4.199999999999999, 4.299999999999999, 4.399999999999999, 4.499999999999999, 4.599999999999999, 4.699999999999998, 4.799999999999999, 4.899999999999999, 4.999999999999998, 5.099999999999999, 5.199999999999998, 5.299999999999999, 5.399999999999999, 5.499999999999998, 5.599999999999999, 5.699999999999998, 5.799999999999999, 5.899999999999999, 5.999999999999998, 6.099999999999999, 6.199999999999998, 6.299999999999998, 6.399999999999999, 6.499999999999998, 6.599999999999998, 6.699999999999998, 6.799999999999998, 6.899999999999999, 6.999999999999998, 7.099999999999998, 7.1999999999999975, 7.299999999999999, 7.399999999999999, 7.499999999999998, 7.599999999999998, 7.6999999999999975, 7.799999999999997, 7.899999999999999, 7.999999999999998, 8.099999999999998, 8.199999999999998, 8.299999999999997, 8.399999999999999, 8.499999999999998, 8.599999999999998, 8.699999999999998, 8.799999999999997, 8.899999999999999, 8.999999999999998, 9.099999999999998, 9.199999999999998, 9.299999999999997, 9.399999999999999, 9.499999999999998, 9.599999999999998, 9.699999999999998, 9.799999999999997, 9.899999999999997, 9.999999999999998, 10.099999999999998, 10.199999999999998, 10.299999999999997, 10.399999999999997, 10.499999999999998, 10.599999999999998, 10.699999999999998, 10.799999999999997, 10.899999999999997, 10.999999999999996, 11.099999999999998, 11.199999999999998, 11.299999999999997, 11.399999999999997, 11.499999999999996, 11.599999999999998, 11.699999999999998, 11.799999999999997, 11.899999999999997, 11.999999999999996, 12.099999999999998, 12.199999999999998, 12.299999999999997, 12.399999999999997, 12.499999999999996, 12.599999999999998, 12.699999999999998, 12.799999999999997, 12.899999999999997, 12.999999999999996, 13.099999999999996, 13.199999999999998, 13.299999999999997, 13.399999999999997, 13.499999999999996, 13.599999999999996, 13.699999999999998, 13.799999999999997, 13.899999999999997, 13.999999999999996, 14.099999999999996, 14.199999999999996, 14.299999999999997, 14.399999999999997, 14.499999999999996, 14.599999999999996, 14.699999999999996, 14.799999999999997, 14.899999999999997, 14.999999999999996], tolerance=0.0), NumericalDiscreteParameter(name='Inhibitor Concentration (M)', encoding=None, _values=[0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09], tolerance=0.0), NumericalDiscreteParameter(name='Salt Concentration (M)', encoding=None, _values=[0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35000000000000003, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41000000000000003, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47000000000000003, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.5700000000000001, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.6900000000000001, 0.7000000000000001, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.8200000000000001, 0.8300000000000001, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.9400000000000001, 0.9500000000000001, 0.96, 0.97, 0.98, 0.99, 1.0, 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.09, 1.1, 1.11, 1.12, 1.1300000000000001, 1.1400000000000001, 1.1500000000000001, 1.16, 1.17, 1.18, 1.19, 1.2, 1.21, 1.22, 1.23, 1.24, 1.25, 1.26, 1.27, 1.28, 1.29, 1.3, 1.31, 1.32, 1.33, 1.34, 1.35, 1.36, 1.37, 1.3800000000000001, 1.3900000000000001, 1.4000000000000001, 1.41, 1.42, 1.43, 1.44, 1.45, 1.46, 1.47, 1.48, 1.49, 1.5, 1.51, 1.52, 1.53, 1.54, 1.55, 1.56, 1.57, 1.58, 1.59, 1.6, 1.61, 1.62, 1.6300000000000001, 1.6400000000000001, 1.6500000000000001, 1.6600000000000001, 1.67, 1.68, 1.69, 1.7, 1.71, 1.72, 1.73, 1.74, 1.75, 1.76, 1.77, 1.78, 1.79, 1.8, 1.81, 1.82, 1.83, 1.84, 1.85, 1.86, 1.87, 1.8800000000000001, 1.8900000000000001, 1.9000000000000001, 1.9100000000000001, 1.92, 1.93, 1.94, 1.95, 1.96, 1.97, 1.98, 1.99, 2.0], tolerance=0.0)], exp_rep= Time (h) pH Inhibitor Concentration (M) Salt Concentration (M)\n", - "0 1.0 -1.0 0.00 0.00\n", - "1 1.0 -1.0 0.00 0.01\n", - "2 1.0 -1.0 0.00 0.02\n", - "3 1.0 -1.0 0.00 0.03\n", - "4 1.0 -1.0 0.00 0.04\n", - "... ... ... ... ...\n", - "7766635 24.0 15.0 0.09 1.96\n", - "7766636 24.0 15.0 0.09 1.97\n", - "7766637 24.0 15.0 0.09 1.98\n", - "7766638 24.0 15.0 0.09 1.99\n", - "7766639 24.0 15.0 0.09 2.00\n", - "\n", - "[7766640 rows x 4 columns], metadata= was_recommended was_measured dont_recommend\n", - "0 False False False\n", - "1 False False False\n", - "2 False False False\n", - "3 False False False\n", - "4 False False False\n", - "... ... ... ...\n", - "7766635 False False False\n", - "7766636 False False False\n", - "7766637 False False False\n", - "7766638 False False False\n", - "7766639 False False False\n", - "\n", - "[7766640 rows x 3 columns], empty_encoding=False, constraints=[], comp_rep= Time (h) pH Inhibitor Concentration (M) Salt Concentration (M)\n", - "0 1.0 -1.0 0.00 0.00\n", - "1 1.0 -1.0 0.00 0.01\n", - "2 1.0 -1.0 0.00 0.02\n", - "3 1.0 -1.0 0.00 0.03\n", - "4 1.0 -1.0 0.00 0.04\n", - "... ... ... ... ...\n", - "7766635 24.0 15.0 0.09 1.96\n", - "7766636 24.0 15.0 0.09 1.97\n", - "7766637 24.0 15.0 0.09 1.98\n", - "7766638 24.0 15.0 0.09 1.99\n", - "7766639 24.0 15.0 0.09 2.00\n", - "\n", - "[7766640 rows x 4 columns]), continuous=SubspaceContinuous(parameters=[], constraints_lin_eq=[], constraints_lin_ineq=[])), objective=Objective(mode='SINGLE', targets=[NumericalTarget(name='Efficiency', mode=, bounds=Interval(lower=-inf, upper=inf), transformation=None)], weights=[100.0], combine_func='GEOM_MEAN'), recommender=TwoPhaseMetaRecommender(allow_repeated_recommendations=None, allow_recommending_already_measured=None, initial_recommender=RandomRecommender(allow_repeated_recommendations=False, allow_recommending_already_measured=True), recommender=SequentialGreedyRecommender(allow_repeated_recommendations=False, allow_recommending_already_measured=False, surrogate_model=GaussianProcessSurrogate(model_params={}, _model=None), acquisition_function_cls='qEI', _acquisition_function=None, hybrid_sampler='Farthest', sampling_percentage=0.3), switch_after=1), n_batches_done=1, n_fits_done=0, _measurements_exp= Time (h) pH Inhibitor Concentration (M) Salt Concentration (M) \\\n", - "0 19.0 6.1 0.04 0.96 \n", - "1 10.0 9.2 0.07 1.40 \n", - "2 15.0 1.3 0.00 1.19 \n", - "\n", - " Efficiency BatchNr FitNr \n", - "0 79.8 1 NaN \n", - "1 54.1 1 NaN \n", - "2 59.4 1 NaN , _cached_recommendation=Empty DataFrame\n", + "Campaign(searchspace=SearchSpace(discrete=SubspaceDiscrete(parameters=[NumericalDiscreteParameter(name='Time_h', encoding=None, _values=[0.5, 1.0, 2.0, 3.0, 6.0, 24.0, 48.0, 72.0, 96.0, 120.0, 144.0, 168.0, 192.0, 240.0, 288.0, 336.0, 360.0, 384.0, 432.0, 480.0, 528.0, 576.0, 600.0, 624.0, 672.0], tolerance=0.0), NumericalDiscreteParameter(name='pH', encoding=None, _values=[0.0, 3.3, 4.0, 4.4, 5.4, 5.5, 5.6, 7.0, 10.0], tolerance=0.0), NumericalDiscreteParameter(name='Inhib_Concentrat_M', encoding=None, _values=[1e-05, 5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0008, 0.001, 0.0012, 0.0018, 0.0024, 0.003, 0.005, 0.01, 0.011, 0.021, 0.022, 0.031, 0.033, 0.042, 0.044, 0.05, 0.1], tolerance=0.0), NumericalDiscreteParameter(name='Salt_Concentrat_M', encoding=None, _values=[0.0, 0.01, 0.05, 0.1, 0.5, 0.6], tolerance=0.0), SubstanceParameter(name='SMILES', data={'COCCOC(=O)OCSc1nc2c(s1)cccc2': 'COCCOC(=O)OCSc1nc2c(s1)cccc2', 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O': 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O', 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O': 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O', 'On1nnc2c1cccc2': 'On1nnc2c1cccc2', 'c1ncn[nH]1': 'c1ncn[nH]1', 'Sc1n[nH]cn1': 'Sc1n[nH]cn1', 'S[C]1NC2=C[CH]C=NC2=N1': 'S[C]1NC2=C[CH]C=NC2=N1', 'S=c1[nH]c2c([nH]1)nccn2': 'S=c1[nH]c2c([nH]1)nccn2', 'Sc1ncc[nH]1': 'Sc1ncc[nH]1', 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1': 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1', 'CCSc1nnc(s1)N': 'CCSc1nnc(s1)N', 'CSc1nnc(s1)N': 'CSc1nnc(s1)N', 'Cc1ccc2c(c1)nc([nH]2)S': 'Cc1ccc2c(c1)nc([nH]2)S', 'OC(=O)CS': 'OC(=O)CS', 'Sc1nc2c([nH]1)cccc2': 'Sc1nc2c([nH]1)cccc2', 'OC(=O)c1ccccc1S': 'OC(=O)c1ccccc1S', 'S=c1sc2c([nH]1)cccc2': 'S=c1sc2c([nH]1)cccc2', 'OC(=O)c1cccnc1S': 'OC(=O)c1cccnc1S', 'Sc1ncccn1': 'Sc1ncccn1', 'c1ccc(nc1)c1ccccn1': 'c1ccc(nc1)c1ccccn1', 'Sc1nnc(s1)S': 'Sc1nnc(s1)S', 'Nc1cc(S)nc(n1)N': 'Nc1cc(S)nc(n1)N', 'Nc1nc([nH]n1)C(=O)O': 'Nc1nc([nH]n1)C(=O)O', 'Nc1n[nH]cn1': 'Nc1n[nH]cn1', 'OC(=O)c1n[nH]c(n1)N': 'OC(=O)c1n[nH]c(n1)N', 'Nc1n[nH]c(n1)S': 'Nc1n[nH]c(n1)S', 'CS[C]1N[N]C(=N1)N': 'CS[C]1N[N]C(=N1)N', 'C1=CC(=CC(=C1)S)C(=O)O': 'C1=CC(=CC(=C1)S)C(=O)O', 'OC(=O)CCS': 'OC(=O)CCS', 'Oc1ccccc1c1nnc([nH]1)S': 'Oc1ccccc1c1nnc([nH]1)S', 'Nn1cnnc1': 'Nn1cnnc1', 'Nc1ccnc(n1)S': 'Nc1ccnc(n1)S', 'Nn1c(NN)nnc1S': 'Nn1c(NN)nnc1S', 'Nn1c(S)nnc1c1ccccc1': 'Nn1c(S)nnc1c1ccccc1', 'Sc1nc(N)c2c(n1)[nH]nc2': 'Sc1nc(N)c2c(n1)[nH]nc2', 'Oc1ccc(cc1)C(=O)O': 'Oc1ccc(cc1)C(=O)O', 'OC(=O)c1ccc(cc1)S': 'OC(=O)c1ccc(cc1)S', 'Cn1cnnc1S': 'Cn1cnnc1S', 'Sc1nc(N)c(c(n1)S)N': 'Sc1nc(N)c(c(n1)S)N', 'Nc1ncncc1N': 'Nc1ncncc1N', 'Nc1cc(N)nc(n1)S': 'Nc1cc(N)nc(n1)S', 'Cc1cc(C)nc(n1)S': 'Cc1cc(C)nc(n1)S', 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1': 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1', 'COc1cccc(c1)c1n[nH]c(=S)[nH]1': 'COc1cccc(c1)c1n[nH]c(=S)[nH]1', 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1': 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1', 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]': 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]', 'S=c1[nH]nc([nH]1)c1ccco1': 'S=c1[nH]nc([nH]1)c1ccco1', 'S=c1[nH]nc([nH]1)c1cccnc1': 'S=c1[nH]nc([nH]1)c1cccnc1', 'S=c1[nH]nc([nH]1)c1ccncc1': 'S=c1[nH]nc([nH]1)c1ccncc1', 'Nc1n[nH]c(=S)s1': 'Nc1n[nH]c(=S)s1', 'Cc1nsc(c1)N': 'Cc1nsc(c1)N', 'Clc1ccc2c(c1)[nH]c(n2)S': 'Clc1ccc2c(c1)[nH]c(n2)S', 'CCOc1ccc2c(c1)nc([nH]2)S': 'CCOc1ccc2c(c1)nc([nH]2)S', 'Cn1nnnc1S': 'Cn1nnnc1S', 'OC(=O)Cn1nnnc1S': 'OC(=O)Cn1nnnc1S', 'COc1ccc2c(c1)[nH]c(=S)[nH]2': 'COc1ccc2c(c1)[nH]c(=S)[nH]2', 'Cc1n[nH]c(=S)s1': 'Cc1n[nH]c(=S)s1', 'ClC([C]1N[N]C=N1)(Cl)Cl': 'ClC([C]1N[N]C=N1)(Cl)Cl', 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl': 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl', 'CSc1[nH]c2c(n1)cc(c(c2)C)C': 'CSc1[nH]c2c(n1)cc(c(c2)C)C', 'Nc1ccc2c(c1)sc(=S)[nH]2': 'Nc1ccc2c(c1)sc(=S)[nH]2', 'OC(=O)c1ccc(=S)[nH]c1': 'OC(=O)c1ccc(=S)[nH]c1', 'Oc1cccc2c1nccc2': 'Oc1cccc2c1nccc2', 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C': 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C', 'S=c1[nH]c2c([nH]1)cncn2': 'S=c1[nH]c2c([nH]1)cncn2', 'CC(=O)O': 'CC(=O)O', 'OC(=O)CCCCC(=O)O': 'OC(=O)CCCCC(=O)O', 'OC(=O)c1ccccc1': 'OC(=O)c1ccccc1', 'c1ccc2c(c1)[nH]nn2': 'c1ccc2c(c1)[nH]nn2', 'OC(=O)c1ccc(cc1)c1ccccc1': 'OC(=O)c1ccc(cc1)c1ccccc1', 'OC(=O)/C=C/c1ccccc1': 'OC(=O)/C=C/c1ccccc1', 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O': 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O', 'O[C@H]1C(=O)OCC1(C)C': 'O[C@H]1C(=O)OCC1(C)C', 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O': 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O', 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O', 'CC(=O)SSC(=O)C': 'CC(=O)SSC(=O)C', 'CCCCOP(=O)(OCCCC)O': 'CCCCOP(=O)(OCCCC)O', 'CCN(C(=S)S)CC': 'CCN(C(=S)S)CC', 'O/N=C(/C(=N/O)/C)\\\\C': 'O/N=C(/C(=N/O)/C)\\\\C', 'CCCCCCCCCCCCc1ccccc1S([O])([O])O': 'CCCCCCCCCCCCc1ccccc1S([O])([O])O', 'CCCCCCCCCCCCOS(=O)(=O)O': 'CCCCCCCCCCCCOS(=O)(=O)O', 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O': 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O', 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1': 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1', 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O', 'OCC(CO)O': 'OCC(CO)O', 'NCC(=O)O': 'NCC(=O)O', 'OC(=O)CCCCCCCCCCCCCCC(=O)O': 'OC(=O)CCCCCCCCCCCCCCC(=O)O', 'C1N2CN3CN1CN(C2)C3': 'C1N2CN3CN1CN(C2)C3', 'NO': 'NO', 'COC(=O)CCCC1=CNC2=CC=CC=C21': 'COC(=O)CCCC1=CNC2=CC=CC=C21', 'OC(=O)c1ccncc1': 'OC(=O)c1ccncc1', 'C1COCCN1CCCS(=O)(=O)O': 'C1COCCN1CCCS(=O)(=O)O', 'OC(=O)c1cccnc1': 'OC(=O)c1cccnc1', 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O': 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O', 'C(=O)(C(=O)[O-])[O-]': 'C(=O)(C(=O)[O-])[O-]', 'OC(=O)c1ccc(cc1)N': 'OC(=O)c1ccc(cc1)N', 'Oc1ccc(cc1)S([O])([O])O': 'Oc1ccc(cc1)S([O])([O])O', 'OC(=O)c1ccccn1': 'OC(=O)c1ccccn1', 'OC(=O)c1ccccc1O': 'OC(=O)c1ccccc1O', 'CCCCCCCCCCCCCCCCCC(=O)O': 'CCCCCCCCCCCCCCCCCC(=O)O', 'SC#N': 'SC#N', 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]': 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]', '[O-]S(=O)[O-].[Na+].[Na+]': '[O-]S(=O)[O-].[Na+].[Na+]', 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C': 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C', 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C', 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]': 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]', '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]': '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]', '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]': '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]', '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]': '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]', '[Cl-].[Cl-].[Cl-].[Ce+3]': '[Cl-].[Cl-].[Cl-].[Ce+3]', 'CNCC(C1=CC(=CC=C1)O)O': 'CNCC(C1=CC(=CC=C1)O)O', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]', 'C1=CC=C(C(=C1)C=NNC(=S)N)O': 'C1=CC=C(C(=C1)C=NNC(=S)N)O', 'C1=CC(=C(C=C1O)O)C=NNC(=S)N': 'C1=CC(=C(C=C1O)O)C=NNC(=S)N', 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O': 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O', 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O': 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O', 'C1=CC2=NNN=C2C=C1Cl': 'C1=CC2=NNN=C2C=C1Cl', 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]': 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]', 'COC(=O)n1nnc2ccccc12': 'COC(=O)n1nnc2ccccc12'}, decorrelate=0.7, encoding=)], exp_rep= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "0 24.0 4.0 0.0010 0.10 \n", + "1 24.0 10.0 0.0010 0.10 \n", + "2 24.0 4.0 0.0010 0.10 \n", + "3 24.0 10.0 0.0010 0.10 \n", + "4 24.0 4.0 0.0010 0.10 \n", + ".. ... ... ... ... \n", + "606 24.0 7.0 0.0005 0.05 \n", + "607 24.0 7.0 0.0005 0.05 \n", + "608 24.0 7.0 0.0005 0.05 \n", + "609 24.0 7.0 0.0005 0.05 \n", + "610 24.0 7.0 0.0005 0.05 \n", + "\n", + " SMILES \n", + "0 COCCOC(=O)OCSc1nc2c(s1)cccc2 \n", + "1 COCCOC(=O)OCSc1nc2c(s1)cccc2 \n", + "2 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O \n", + "3 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O \n", + "4 Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O \n", + ".. ... \n", + "606 S=c1sc2c([nH]1)cccc2 \n", + "607 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + "608 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + "609 C(=O)(C(=O)[O-])[O-] \n", + "610 C(=O)(C(=O)[O-])[O-] \n", + "\n", + "[611 rows x 5 columns], metadata= was_recommended was_measured dont_recommend\n", + "0 False False False\n", + "1 False False False\n", + "2 False False False\n", + "3 False False False\n", + "4 False False False\n", + ".. ... ... ...\n", + "606 False False False\n", + "607 False False False\n", + "608 False False False\n", + "609 False False False\n", + "610 False False False\n", + "\n", + "[611 rows x 3 columns], empty_encoding=False, constraints=[], comp_rep= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M SMILES_MORDRED_ABC \\\n", + "0 24.0 4.0 0.0010 0.10 14.211085 \n", + "1 24.0 10.0 0.0010 0.10 14.211085 \n", + "2 24.0 4.0 0.0010 0.10 13.532488 \n", + "3 24.0 10.0 0.0010 0.10 13.532488 \n", + "4 24.0 4.0 0.0010 0.10 16.206679 \n", + ".. ... ... ... ... ... \n", + "606 24.0 7.0 0.0005 0.05 7.847124 \n", + "607 24.0 7.0 0.0005 0.05 9.238929 \n", + "608 24.0 7.0 0.0005 0.05 9.238929 \n", + "609 24.0 7.0 0.0005 0.05 3.932653 \n", + "610 24.0 7.0 0.0005 0.05 3.932653 \n", + "\n", + " SMILES_MORDRED_nAcid SMILES_MORDRED_nBase SMILES_MORDRED_nAromAtom \\\n", + "0 0.0 0.0 9.0 \n", + "1 0.0 0.0 9.0 \n", + "2 0.0 0.0 15.0 \n", + "3 0.0 0.0 15.0 \n", + "4 0.0 0.0 11.0 \n", + ".. ... ... ... \n", + "606 0.0 0.0 9.0 \n", + "607 3.0 0.0 0.0 \n", + "608 3.0 0.0 0.0 \n", + "609 2.0 0.0 0.0 \n", + "610 2.0 0.0 0.0 \n", + "\n", + " SMILES_MORDRED_nBridgehead SMILES_MORDRED_nHetero ... \\\n", + "0 0.0 7.0 ... \n", + "1 0.0 7.0 ... \n", + "2 0.0 4.0 ... \n", + "3 0.0 4.0 ... \n", + "4 0.0 5.0 ... \n", + ".. ... ... ... \n", + "606 0.0 3.0 ... \n", + "607 0.0 7.0 ... \n", + "608 0.0 7.0 ... \n", + "609 0.0 4.0 ... \n", + "610 0.0 4.0 ... \n", + "\n", + " SMILES_MORDRED_JGI2 SMILES_MORDRED_JGI3 SMILES_MORDRED_JGI4 \\\n", + "0 0.053333 0.047348 0.025679 \n", + "1 0.053333 0.047348 0.025679 \n", + "2 0.074074 0.049167 0.050028 \n", + "3 0.074074 0.049167 0.050028 \n", + "4 0.104167 0.046456 0.055718 \n", + ".. ... ... ... \n", + "606 0.059259 0.071970 0.042870 \n", + "607 0.117647 0.085938 0.047059 \n", + "608 0.117647 0.085938 0.047059 \n", + "609 0.148148 0.000000 0.000000 \n", + "610 0.148148 0.000000 0.000000 \n", + "\n", + " SMILES_MORDRED_JGI5 SMILES_MORDRED_JGI6 SMILES_MORDRED_JGI7 \\\n", + "0 0.021778 0.007407 0.014227 \n", + "1 0.021778 0.007407 0.014227 \n", + "2 0.026569 0.016799 0.012762 \n", + "3 0.026569 0.016799 0.012762 \n", + "4 0.031875 0.020352 0.014901 \n", + ".. ... ... ... \n", + "606 0.040000 0.000000 0.000000 \n", + "607 0.018519 0.000000 0.000000 \n", + "608 0.018519 0.000000 0.000000 \n", + "609 0.000000 0.000000 0.000000 \n", + "610 0.000000 0.000000 0.000000 \n", + "\n", + " SMILES_MORDRED_JGI8 SMILES_MORDRED_JGI9 SMILES_MORDRED_TopoShapeIndex \\\n", + "0 0.008230 0.006734 0.857143 \n", + "1 0.008230 0.006734 0.857143 \n", + "2 0.010204 0.000000 1.000000 \n", + "3 0.010204 0.000000 1.000000 \n", + "4 0.011255 0.006063 0.833333 \n", + ".. ... ... ... \n", + "606 0.000000 0.000000 0.666667 \n", + "607 0.000000 0.000000 1.000000 \n", + "608 0.000000 0.000000 1.000000 \n", + "609 0.000000 0.000000 0.500000 \n", + "610 0.000000 0.000000 0.500000 \n", + "\n", + " SMILES_MORDRED_MWC06 \n", + "0 7.787797 \n", + "1 7.787797 \n", + "2 8.042056 \n", + "3 8.042056 \n", + "4 8.108623 \n", + ".. ... \n", + "606 7.372118 \n", + "607 7.377134 \n", + "608 7.377134 \n", + "609 5.837730 \n", + "610 5.837730 \n", + "\n", + "[611 rows x 147 columns]), continuous=SubspaceContinuous(parameters=[], constraints_lin_eq=[], constraints_lin_ineq=[])), objective=Objective(mode='SINGLE', targets=[NumericalTarget(name='Efficiency', mode=, bounds=Interval(lower=-inf, upper=inf), transformation=None)], weights=[100.0], combine_func='GEOM_MEAN'), recommender=TwoPhaseMetaRecommender(allow_repeated_recommendations=None, allow_recommending_already_measured=None, initial_recommender=RandomRecommender(allow_repeated_recommendations=False, allow_recommending_already_measured=True), recommender=SequentialGreedyRecommender(allow_repeated_recommendations=False, allow_recommending_already_measured=False, surrogate_model=GaussianProcessSurrogate(model_params={}, _model=None), acquisition_function_cls='qEI', _acquisition_function=None, hybrid_sampler='Farthest', sampling_percentage=0.3), switch_after=1), n_batches_done=1, n_fits_done=0, _measurements_exp= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "0 480.0 7.0 0.03100 0.05 \n", + "1 0.5 7.0 0.01000 0.60 \n", + "2 144.0 7.0 0.00001 0.01 \n", + "\n", + " SMILES Efficiency BatchNr \\\n", + "0 C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[... 79.8 1 \n", + "1 C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O... 54.1 1 \n", + "2 [N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)... 59.4 1 \n", + "\n", + " FitNr \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN , _cached_recommendation=Empty DataFrame\n", "Columns: []\n", "Index: [], numerical_measurements_must_be_within_tolerance=None, strategy=None)" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -761,18 +1000,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [ { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", - "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", - "\u001b[1;31mClick here for more info. \n", - "\u001b[1;31mView Jupyter log for further details." + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Recommended experiments: \n", + "| index | Time_h | pH | Inhib_Concentrat_M | Salt_Concentrat_M | SMILES |\n", + "|--------:|---------:|-----:|---------------------:|--------------------:|:-------------------------------------------------------|\n", + "| 194 | 24 | 10 | 0.001 | 0.1 | C1N2CN3CN1CN(C2)C3 |\n", + "| 297 | 24 | 0 | 0.0004 | 0 | CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C |\n", + "| 300 | 24 | 0 | 0.0004 | 0 | CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C |\n", + "| 303 | 24 | 0 | 0.0004 | 0 | CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C |\n", + "| 306 | 24 | 0 | 0.0004 | 0 | CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C |\n", + "| 586 | 3 | 0 | 0.0008 | 0 | CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O |\n", + "| 591 | 3 | 0 | 0.0008 | 0 | CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O |\n", + "| 596 | 3 | 0 | 0.0008 | 0 | CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O |\n" ] } ],