diff --git a/AA1000_simulation_10MC_50exp_1batch.png b/AA1000_simulation_10MC_50exp_1batch.png new file mode 100644 index 0000000..4e7e429 Binary files /dev/null and b/AA1000_simulation_10MC_50exp_1batch.png differ diff --git a/can_baybe-inhibitor.ipynb b/can_baybe-inhibitor.ipynb index 26e7ce6..271be05 100644 --- a/can_baybe-inhibitor.ipynb +++ b/can_baybe-inhibitor.ipynb @@ -30,18 +30,176 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 114, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\": Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n", - " warnings.warn(\n", - "/home/vscode/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SMILESTime_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
0C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O0.02.05.000000e-07253.85
1C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O0.02.01.000000e-06258.55
2C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O0.02.05.000000e-06267.40
3C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O0.02.01.000000e-05286.65
4C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O0.02.05.000000e-05273.90
.....................
303S=c1sc2c([nH]1)cccc2384.0-0.64.200000e-03070.60
304S=c1sc2c([nH]1)cccc2384.0-0.65.300000e-03079.77
305S=c1sc2c([nH]1)cccc2384.0-0.66.500000e-03076.72
306S=c1sc2c([nH]1)cccc2384.0-0.67.500000e-03075.44
307S=c1sc2c([nH]1)cccc2384.0-0.68.500000e-03077.22
\n", + "

308 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " SMILES Time_h pH Inhib_Concentrat_M \\\n", + "0 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 0.0 2.0 5.000000e-07 \n", + "1 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 0.0 2.0 1.000000e-06 \n", + "2 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 0.0 2.0 5.000000e-06 \n", + "3 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 0.0 2.0 1.000000e-05 \n", + "4 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 0.0 2.0 5.000000e-05 \n", + ".. ... ... ... ... \n", + "303 S=c1sc2c([nH]1)cccc2 384.0 -0.6 4.200000e-03 \n", + "304 S=c1sc2c([nH]1)cccc2 384.0 -0.6 5.300000e-03 \n", + "305 S=c1sc2c([nH]1)cccc2 384.0 -0.6 6.500000e-03 \n", + "306 S=c1sc2c([nH]1)cccc2 384.0 -0.6 7.500000e-03 \n", + "307 S=c1sc2c([nH]1)cccc2 384.0 -0.6 8.500000e-03 \n", + "\n", + " Salt_Concentrat_M Efficiency \n", + "0 2 53.85 \n", + "1 2 58.55 \n", + "2 2 67.40 \n", + "3 2 86.65 \n", + "4 2 73.90 \n", + ".. ... ... \n", + "303 0 70.60 \n", + "304 0 79.77 \n", + "305 0 76.72 \n", + "306 0 75.44 \n", + "307 0 77.22 \n", + "\n", + "[308 rows x 6 columns]" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -59,29 +217,30 @@ "from baybe.simulation import simulate_scenarios\n", "from baybe.targets import NumericalTarget\n", "\n", - "df_AA2024 = pd.read_excel('data/filtered_AA2024.xlsx')\n", - "df_AA1000 = pd.read_excel('data/filtered_AA1000.xlsx')\n", - "df_Al = pd.read_excel('data/filtered_Al.xlsx')\n", + "df_AA2024 = pd.read_excel('data/averaged_filtered_AA2024.xlsx')\n", + "df_AA1000 = pd.read_excel('data/averaged_filtered_AA1000.xlsx')\n", + "df_Al = pd.read_excel('data/averaged_filtered_Al.xlsx')\n", "\n", - "df_active = df_AA2024" + "df_active = df_AA1000\n", + "df_active" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ - "SMOKE_TEST = \"SMOKE_TEST\" in os.environ\n", + "# SMOKE_TEST = \"SMOKE_TEST\" in os.environ\n", "\n", - "N_MC_ITERATIONS = 2 if SMOKE_TEST else 5\n", - "N_DOE_ITERATIONS = 2 if SMOKE_TEST else 5\n", - "BATCH_SIZE = 1 if SMOKE_TEST else 3" + "# N_MC_ITERATIONS = 2 if SMOKE_TEST else 5\n", + "# N_DOE_ITERATIONS = 2 if SMOKE_TEST else 5\n", + "# BATCH_SIZE = 1 if SMOKE_TEST else 3" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 116, "metadata": {}, "outputs": [], "source": [ @@ -90,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ @@ -105,27 +264,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 118, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[NumericalDiscreteParameter(name='Time_h', encoding=None, _values=[0.5, 1.0, 2.0, 3.0, 6.0, 24.0, 48.0, 72.0, 96.0, 120.0, 144.0, 168.0, 192.0, 240.0, 288.0, 336.0, 360.0, 384.0, 432.0, 480.0, 528.0, 576.0, 600.0, 624.0, 672.0], tolerance=0.0),\n", - " NumericalDiscreteParameter(name='pH', encoding=None, _values=[0.0, 3.3, 4.0, 4.4, 5.4, 5.5, 5.6, 7.0, 10.0], tolerance=0.0),\n", - " NumericalDiscreteParameter(name='Inhib_Concentrat_M', encoding=None, _values=[1e-05, 5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0008, 0.001, 0.0012, 0.0018, 0.0024, 0.003, 0.005, 0.01, 0.011, 0.021, 0.022, 0.031, 0.033, 0.042, 0.044, 0.05, 0.1], tolerance=0.0),\n", - " NumericalDiscreteParameter(name='Salt_Concentrat_M', encoding=None, _values=[0.0, 0.01, 0.05, 0.1, 0.5, 0.6], tolerance=0.0),\n", - " SubstanceParameter(name='SMILES', data={'COCCOC(=O)OCSc1nc2c(s1)cccc2': 'COCCOC(=O)OCSc1nc2c(s1)cccc2', 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O': 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O', 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O': 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O', 'On1nnc2c1cccc2': 'On1nnc2c1cccc2', 'c1ncn[nH]1': 'c1ncn[nH]1', 'Sc1n[nH]cn1': 'Sc1n[nH]cn1', 'S[C]1NC2=C[CH]C=NC2=N1': 'S[C]1NC2=C[CH]C=NC2=N1', 'S=c1[nH]c2c([nH]1)nccn2': 'S=c1[nH]c2c([nH]1)nccn2', 'Sc1ncc[nH]1': 'Sc1ncc[nH]1', 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1': 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1', 'CCSc1nnc(s1)N': 'CCSc1nnc(s1)N', 'CSc1nnc(s1)N': 'CSc1nnc(s1)N', 'Cc1ccc2c(c1)nc([nH]2)S': 'Cc1ccc2c(c1)nc([nH]2)S', 'OC(=O)CS': 'OC(=O)CS', 'Sc1nc2c([nH]1)cccc2': 'Sc1nc2c([nH]1)cccc2', 'OC(=O)c1ccccc1S': 'OC(=O)c1ccccc1S', 'S=c1sc2c([nH]1)cccc2': 'S=c1sc2c([nH]1)cccc2', 'OC(=O)c1cccnc1S': 'OC(=O)c1cccnc1S', 'Sc1ncccn1': 'Sc1ncccn1', 'c1ccc(nc1)c1ccccn1': 'c1ccc(nc1)c1ccccn1', 'Sc1nnc(s1)S': 'Sc1nnc(s1)S', 'Nc1cc(S)nc(n1)N': 'Nc1cc(S)nc(n1)N', 'Nc1nc([nH]n1)C(=O)O': 'Nc1nc([nH]n1)C(=O)O', 'Nc1n[nH]cn1': 'Nc1n[nH]cn1', 'OC(=O)c1n[nH]c(n1)N': 'OC(=O)c1n[nH]c(n1)N', 'Nc1n[nH]c(n1)S': 'Nc1n[nH]c(n1)S', 'CS[C]1N[N]C(=N1)N': 'CS[C]1N[N]C(=N1)N', 'C1=CC(=CC(=C1)S)C(=O)O': 'C1=CC(=CC(=C1)S)C(=O)O', 'OC(=O)CCS': 'OC(=O)CCS', 'Oc1ccccc1c1nnc([nH]1)S': 'Oc1ccccc1c1nnc([nH]1)S', 'Nn1cnnc1': 'Nn1cnnc1', 'Nc1ccnc(n1)S': 'Nc1ccnc(n1)S', 'Nn1c(NN)nnc1S': 'Nn1c(NN)nnc1S', 'Nn1c(S)nnc1c1ccccc1': 'Nn1c(S)nnc1c1ccccc1', 'Sc1nc(N)c2c(n1)[nH]nc2': 'Sc1nc(N)c2c(n1)[nH]nc2', 'Oc1ccc(cc1)C(=O)O': 'Oc1ccc(cc1)C(=O)O', 'OC(=O)c1ccc(cc1)S': 'OC(=O)c1ccc(cc1)S', 'Cn1cnnc1S': 'Cn1cnnc1S', 'Sc1nc(N)c(c(n1)S)N': 'Sc1nc(N)c(c(n1)S)N', 'Nc1ncncc1N': 'Nc1ncncc1N', 'Nc1cc(N)nc(n1)S': 'Nc1cc(N)nc(n1)S', 'Cc1cc(C)nc(n1)S': 'Cc1cc(C)nc(n1)S', 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1': 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1', 'COc1cccc(c1)c1n[nH]c(=S)[nH]1': 'COc1cccc(c1)c1n[nH]c(=S)[nH]1', 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1': 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1', 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]': 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]', 'S=c1[nH]nc([nH]1)c1ccco1': 'S=c1[nH]nc([nH]1)c1ccco1', 'S=c1[nH]nc([nH]1)c1cccnc1': 'S=c1[nH]nc([nH]1)c1cccnc1', 'S=c1[nH]nc([nH]1)c1ccncc1': 'S=c1[nH]nc([nH]1)c1ccncc1', 'Nc1n[nH]c(=S)s1': 'Nc1n[nH]c(=S)s1', 'Cc1nsc(c1)N': 'Cc1nsc(c1)N', 'Clc1ccc2c(c1)[nH]c(n2)S': 'Clc1ccc2c(c1)[nH]c(n2)S', 'CCOc1ccc2c(c1)nc([nH]2)S': 'CCOc1ccc2c(c1)nc([nH]2)S', 'Cn1nnnc1S': 'Cn1nnnc1S', 'OC(=O)Cn1nnnc1S': 'OC(=O)Cn1nnnc1S', 'COc1ccc2c(c1)[nH]c(=S)[nH]2': 'COc1ccc2c(c1)[nH]c(=S)[nH]2', 'Cc1n[nH]c(=S)s1': 'Cc1n[nH]c(=S)s1', 'ClC([C]1N[N]C=N1)(Cl)Cl': 'ClC([C]1N[N]C=N1)(Cl)Cl', 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl': 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl', 'CSc1[nH]c2c(n1)cc(c(c2)C)C': 'CSc1[nH]c2c(n1)cc(c(c2)C)C', 'Nc1ccc2c(c1)sc(=S)[nH]2': 'Nc1ccc2c(c1)sc(=S)[nH]2', 'OC(=O)c1ccc(=S)[nH]c1': 'OC(=O)c1ccc(=S)[nH]c1', 'Oc1cccc2c1nccc2': 'Oc1cccc2c1nccc2', 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C': 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C', 'S=c1[nH]c2c([nH]1)cncn2': 'S=c1[nH]c2c([nH]1)cncn2', 'CC(=O)O': 'CC(=O)O', 'OC(=O)CCCCC(=O)O': 'OC(=O)CCCCC(=O)O', 'OC(=O)c1ccccc1': 'OC(=O)c1ccccc1', 'c1ccc2c(c1)[nH]nn2': 'c1ccc2c(c1)[nH]nn2', 'OC(=O)c1ccc(cc1)c1ccccc1': 'OC(=O)c1ccc(cc1)c1ccccc1', 'OC(=O)/C=C/c1ccccc1': 'OC(=O)/C=C/c1ccccc1', 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O': 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O', 'O[C@H]1C(=O)OCC1(C)C': 'O[C@H]1C(=O)OCC1(C)C', 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O': 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O', 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O', 'CC(=O)SSC(=O)C': 'CC(=O)SSC(=O)C', 'CCCCOP(=O)(OCCCC)O': 'CCCCOP(=O)(OCCCC)O', 'CCN(C(=S)S)CC': 'CCN(C(=S)S)CC', 'O/N=C(/C(=N/O)/C)\\\\C': 'O/N=C(/C(=N/O)/C)\\\\C', 'CCCCCCCCCCCCc1ccccc1S([O])([O])O': 'CCCCCCCCCCCCc1ccccc1S([O])([O])O', 'CCCCCCCCCCCCOS(=O)(=O)O': 'CCCCCCCCCCCCOS(=O)(=O)O', 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O': 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O', 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1': 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1', 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O', 'OCC(CO)O': 'OCC(CO)O', 'NCC(=O)O': 'NCC(=O)O', 'OC(=O)CCCCCCCCCCCCCCC(=O)O': 'OC(=O)CCCCCCCCCCCCCCC(=O)O', 'C1N2CN3CN1CN(C2)C3': 'C1N2CN3CN1CN(C2)C3', 'NO': 'NO', 'COC(=O)CCCC1=CNC2=CC=CC=C21': 'COC(=O)CCCC1=CNC2=CC=CC=C21', 'OC(=O)c1ccncc1': 'OC(=O)c1ccncc1', 'C1COCCN1CCCS(=O)(=O)O': 'C1COCCN1CCCS(=O)(=O)O', 'OC(=O)c1cccnc1': 'OC(=O)c1cccnc1', 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O': 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O', 'C(=O)(C(=O)[O-])[O-]': 'C(=O)(C(=O)[O-])[O-]', 'OC(=O)c1ccc(cc1)N': 'OC(=O)c1ccc(cc1)N', 'Oc1ccc(cc1)S([O])([O])O': 'Oc1ccc(cc1)S([O])([O])O', 'OC(=O)c1ccccn1': 'OC(=O)c1ccccn1', 'OC(=O)c1ccccc1O': 'OC(=O)c1ccccc1O', 'CCCCCCCCCCCCCCCCCC(=O)O': 'CCCCCCCCCCCCCCCCCC(=O)O', 'SC#N': 'SC#N', 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]': 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]', '[O-]S(=O)[O-].[Na+].[Na+]': '[O-]S(=O)[O-].[Na+].[Na+]', 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C': 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C', 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C', 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]': 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]', '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]': '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]', '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]': '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]', '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]': '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]', '[Cl-].[Cl-].[Cl-].[Ce+3]': '[Cl-].[Cl-].[Cl-].[Ce+3]', 'CNCC(C1=CC(=CC=C1)O)O': 'CNCC(C1=CC(=CC=C1)O)O', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]', 'C1=CC=C(C(=C1)C=NNC(=S)N)O': 'C1=CC=C(C(=C1)C=NNC(=S)N)O', 'C1=CC(=C(C=C1O)O)C=NNC(=S)N': 'C1=CC(=C(C=C1O)O)C=NNC(=S)N', 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O': 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O', 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O': 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O', 'C1=CC2=NNN=C2C=C1Cl': 'C1=CC2=NNN=C2C=C1Cl', 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]': 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]', 'COC(=O)n1nnc2ccccc12': 'COC(=O)n1nnc2ccccc12'}, decorrelate=0.7, encoding=)]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# parameters\n", - "parameters = [\n", + "\n", + "# mordred\n", + "parameters_mordred = [\n", "NumericalDiscreteParameter(\n", " name=\"Time_h\",\n", " values=df_active['Time_h'].unique(),\n", @@ -153,208 +299,295 @@ " decorrelate=0.7, # optional\n", " ) \n", " ]\n", - "parameters" + "\n", + "# morgan fingerprints\n", + "parameters_morgan_fp = [\n", + "NumericalDiscreteParameter(\n", + " name=\"Time_h\",\n", + " values=df_active['Time_h'].unique(),\n", + " # tolerance = 0.004, assume certain experimental noise for each parameter measurement?\n", + "),\n", + "NumericalDiscreteParameter(\n", + " name=\"pH\",\n", + " values=df_active['pH'].unique(),\n", + " # tolerance = 0.004\n", + " ), \n", + "NumericalDiscreteParameter( # Set this as continuous, the values seem quite small?\n", + " name=\"Inhib_Concentrat_M\",\n", + " values= df_active['Inhib_Concentrat_M'].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + "NumericalDiscreteParameter(\n", + " name=\"Salt_Concentrat_M\",\n", + " values=df_active['Salt_Concentrat_M'].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + "SubstanceParameter(\n", + " name=\"SMILES\",\n", + " data=smiles_dict,\n", + " encoding=\"MORGAN_FP\", # optional\n", + " decorrelate=0.7, # optional\n", + " ) \n", + " ]\n", + "\n", + "# rdkit\n", + "parameters_rdkit = [\n", + "NumericalDiscreteParameter(\n", + " name=\"Time_h\",\n", + " values=df_active['Time_h'].unique(),\n", + " # tolerance = 0.004, assume certain experimental noise for each parameter measurement?\n", + "),\n", + "NumericalDiscreteParameter(\n", + " name=\"pH\",\n", + " values=df_active['pH'].unique(),\n", + " # tolerance = 0.004\n", + " ), \n", + "NumericalDiscreteParameter( # Set this as continuous, the values seem quite small?\n", + " name=\"Inhib_Concentrat_M\",\n", + " values= df_active['Inhib_Concentrat_M'].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + "NumericalDiscreteParameter(\n", + " name=\"Salt_Concentrat_M\",\n", + " values=df_active['Salt_Concentrat_M'].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + "SubstanceParameter(\n", + " name=\"SMILES\",\n", + " data=smiles_dict,\n", + " encoding=\"RDKIT\", # optional\n", + " decorrelate=0.7, # optional\n", + " ) \n", + " ]\n" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 119, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Print test 1\n" - ] - } - ], + "outputs": [], "source": [ "df_no_target = lookup.drop('Efficiency', axis=1)\n", "\n", - "searchspace = SearchSpace.from_dataframe(df = df_no_target, parameters=parameters)\n", - "print('Print test 1')\n", + "# searchspace = SearchSpace.from_dataframe(df = df_no_target, parameters=parameters)\n", + "# print('Print test 1')\n", + "# objective = Objective(\n", + "# mode=\"SINGLE\", targets=[NumericalTarget(name=\"Efficiency\", mode=\"MAX\")]\n", + "# )\n", + "\n", + "\n", + "searchspace_mordred = SearchSpace.from_dataframe(df = df_no_target, parameters=parameters_mordred)\n", + "\n", + "\n", + "searchspace_morgan = SearchSpace.from_dataframe(df = df_no_target, parameters=parameters_morgan_fp)\n", + "\n", + "\n", + "searchspace_rdkit = SearchSpace.from_dataframe(df = df_no_target, parameters=parameters_rdkit)\n", + "\n", + "\n", "objective = Objective(\n", " mode=\"SINGLE\", targets=[NumericalTarget(name=\"Efficiency\", mode=\"MAX\")]\n", - ")" + ")\n" ] }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Print test 2\n" - ] - } - ], - "source": [ - "print('Print test 2')" - ] - }, - { - "cell_type": "code", - "execution_count": 9, + "execution_count": 120, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "SearchSpace(discrete=SubspaceDiscrete(parameters=[NumericalDiscreteParameter(name='Time_h', encoding=None, _values=[0.5, 1.0, 2.0, 3.0, 6.0, 24.0, 48.0, 72.0, 96.0, 120.0, 144.0, 168.0, 192.0, 240.0, 288.0, 336.0, 360.0, 384.0, 432.0, 480.0, 528.0, 576.0, 600.0, 624.0, 672.0], tolerance=0.0), NumericalDiscreteParameter(name='pH', encoding=None, _values=[0.0, 3.3, 4.0, 4.4, 5.4, 5.5, 5.6, 7.0, 10.0], tolerance=0.0), NumericalDiscreteParameter(name='Inhib_Concentrat_M', encoding=None, _values=[1e-05, 5e-05, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0008, 0.001, 0.0012, 0.0018, 0.0024, 0.003, 0.005, 0.01, 0.011, 0.021, 0.022, 0.031, 0.033, 0.042, 0.044, 0.05, 0.1], tolerance=0.0), NumericalDiscreteParameter(name='Salt_Concentrat_M', encoding=None, _values=[0.0, 0.01, 0.05, 0.1, 0.5, 0.6], tolerance=0.0), SubstanceParameter(name='SMILES', data={'COCCOC(=O)OCSc1nc2c(s1)cccc2': 'COCCOC(=O)OCSc1nc2c(s1)cccc2', 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O': 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O', 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O': 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O', 'On1nnc2c1cccc2': 'On1nnc2c1cccc2', 'c1ncn[nH]1': 'c1ncn[nH]1', 'Sc1n[nH]cn1': 'Sc1n[nH]cn1', 'S[C]1NC2=C[CH]C=NC2=N1': 'S[C]1NC2=C[CH]C=NC2=N1', 'S=c1[nH]c2c([nH]1)nccn2': 'S=c1[nH]c2c([nH]1)nccn2', 'Sc1ncc[nH]1': 'Sc1ncc[nH]1', 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1': 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1', 'CCSc1nnc(s1)N': 'CCSc1nnc(s1)N', 'CSc1nnc(s1)N': 'CSc1nnc(s1)N', 'Cc1ccc2c(c1)nc([nH]2)S': 'Cc1ccc2c(c1)nc([nH]2)S', 'OC(=O)CS': 'OC(=O)CS', 'Sc1nc2c([nH]1)cccc2': 'Sc1nc2c([nH]1)cccc2', 'OC(=O)c1ccccc1S': 'OC(=O)c1ccccc1S', 'S=c1sc2c([nH]1)cccc2': 'S=c1sc2c([nH]1)cccc2', 'OC(=O)c1cccnc1S': 'OC(=O)c1cccnc1S', 'Sc1ncccn1': 'Sc1ncccn1', 'c1ccc(nc1)c1ccccn1': 'c1ccc(nc1)c1ccccn1', 'Sc1nnc(s1)S': 'Sc1nnc(s1)S', 'Nc1cc(S)nc(n1)N': 'Nc1cc(S)nc(n1)N', 'Nc1nc([nH]n1)C(=O)O': 'Nc1nc([nH]n1)C(=O)O', 'Nc1n[nH]cn1': 'Nc1n[nH]cn1', 'OC(=O)c1n[nH]c(n1)N': 'OC(=O)c1n[nH]c(n1)N', 'Nc1n[nH]c(n1)S': 'Nc1n[nH]c(n1)S', 'CS[C]1N[N]C(=N1)N': 'CS[C]1N[N]C(=N1)N', 'C1=CC(=CC(=C1)S)C(=O)O': 'C1=CC(=CC(=C1)S)C(=O)O', 'OC(=O)CCS': 'OC(=O)CCS', 'Oc1ccccc1c1nnc([nH]1)S': 'Oc1ccccc1c1nnc([nH]1)S', 'Nn1cnnc1': 'Nn1cnnc1', 'Nc1ccnc(n1)S': 'Nc1ccnc(n1)S', 'Nn1c(NN)nnc1S': 'Nn1c(NN)nnc1S', 'Nn1c(S)nnc1c1ccccc1': 'Nn1c(S)nnc1c1ccccc1', 'Sc1nc(N)c2c(n1)[nH]nc2': 'Sc1nc(N)c2c(n1)[nH]nc2', 'Oc1ccc(cc1)C(=O)O': 'Oc1ccc(cc1)C(=O)O', 'OC(=O)c1ccc(cc1)S': 'OC(=O)c1ccc(cc1)S', 'Cn1cnnc1S': 'Cn1cnnc1S', 'Sc1nc(N)c(c(n1)S)N': 'Sc1nc(N)c(c(n1)S)N', 'Nc1ncncc1N': 'Nc1ncncc1N', 'Nc1cc(N)nc(n1)S': 'Nc1cc(N)nc(n1)S', 'Cc1cc(C)nc(n1)S': 'Cc1cc(C)nc(n1)S', 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1': 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1', 'COc1cccc(c1)c1n[nH]c(=S)[nH]1': 'COc1cccc(c1)c1n[nH]c(=S)[nH]1', 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1': 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1', 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]': 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]', 'S=c1[nH]nc([nH]1)c1ccco1': 'S=c1[nH]nc([nH]1)c1ccco1', 'S=c1[nH]nc([nH]1)c1cccnc1': 'S=c1[nH]nc([nH]1)c1cccnc1', 'S=c1[nH]nc([nH]1)c1ccncc1': 'S=c1[nH]nc([nH]1)c1ccncc1', 'Nc1n[nH]c(=S)s1': 'Nc1n[nH]c(=S)s1', 'Cc1nsc(c1)N': 'Cc1nsc(c1)N', 'Clc1ccc2c(c1)[nH]c(n2)S': 'Clc1ccc2c(c1)[nH]c(n2)S', 'CCOc1ccc2c(c1)nc([nH]2)S': 'CCOc1ccc2c(c1)nc([nH]2)S', 'Cn1nnnc1S': 'Cn1nnnc1S', 'OC(=O)Cn1nnnc1S': 'OC(=O)Cn1nnnc1S', 'COc1ccc2c(c1)[nH]c(=S)[nH]2': 'COc1ccc2c(c1)[nH]c(=S)[nH]2', 'Cc1n[nH]c(=S)s1': 'Cc1n[nH]c(=S)s1', 'ClC([C]1N[N]C=N1)(Cl)Cl': 'ClC([C]1N[N]C=N1)(Cl)Cl', 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl': 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl', 'CSc1[nH]c2c(n1)cc(c(c2)C)C': 'CSc1[nH]c2c(n1)cc(c(c2)C)C', 'Nc1ccc2c(c1)sc(=S)[nH]2': 'Nc1ccc2c(c1)sc(=S)[nH]2', 'OC(=O)c1ccc(=S)[nH]c1': 'OC(=O)c1ccc(=S)[nH]c1', 'Oc1cccc2c1nccc2': 'Oc1cccc2c1nccc2', 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C': 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C', 'S=c1[nH]c2c([nH]1)cncn2': 'S=c1[nH]c2c([nH]1)cncn2', 'CC(=O)O': 'CC(=O)O', 'OC(=O)CCCCC(=O)O': 'OC(=O)CCCCC(=O)O', 'OC(=O)c1ccccc1': 'OC(=O)c1ccccc1', 'c1ccc2c(c1)[nH]nn2': 'c1ccc2c(c1)[nH]nn2', 'OC(=O)c1ccc(cc1)c1ccccc1': 'OC(=O)c1ccc(cc1)c1ccccc1', 'OC(=O)/C=C/c1ccccc1': 'OC(=O)/C=C/c1ccccc1', 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O': 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O', 'O[C@H]1C(=O)OCC1(C)C': 'O[C@H]1C(=O)OCC1(C)C', 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O': 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O', 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O', 'CC(=O)SSC(=O)C': 'CC(=O)SSC(=O)C', 'CCCCOP(=O)(OCCCC)O': 'CCCCOP(=O)(OCCCC)O', 'CCN(C(=S)S)CC': 'CCN(C(=S)S)CC', 'O/N=C(/C(=N/O)/C)\\\\C': 'O/N=C(/C(=N/O)/C)\\\\C', 'CCCCCCCCCCCCc1ccccc1S([O])([O])O': 'CCCCCCCCCCCCc1ccccc1S([O])([O])O', 'CCCCCCCCCCCCOS(=O)(=O)O': 'CCCCCCCCCCCCOS(=O)(=O)O', 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O': 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O', 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1': 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1', 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O', 'OCC(CO)O': 'OCC(CO)O', 'NCC(=O)O': 'NCC(=O)O', 'OC(=O)CCCCCCCCCCCCCCC(=O)O': 'OC(=O)CCCCCCCCCCCCCCC(=O)O', 'C1N2CN3CN1CN(C2)C3': 'C1N2CN3CN1CN(C2)C3', 'NO': 'NO', 'COC(=O)CCCC1=CNC2=CC=CC=C21': 'COC(=O)CCCC1=CNC2=CC=CC=C21', 'OC(=O)c1ccncc1': 'OC(=O)c1ccncc1', 'C1COCCN1CCCS(=O)(=O)O': 'C1COCCN1CCCS(=O)(=O)O', 'OC(=O)c1cccnc1': 'OC(=O)c1cccnc1', 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O': 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O', 'C(=O)(C(=O)[O-])[O-]': 'C(=O)(C(=O)[O-])[O-]', 'OC(=O)c1ccc(cc1)N': 'OC(=O)c1ccc(cc1)N', 'Oc1ccc(cc1)S([O])([O])O': 'Oc1ccc(cc1)S([O])([O])O', 'OC(=O)c1ccccn1': 'OC(=O)c1ccccn1', 'OC(=O)c1ccccc1O': 'OC(=O)c1ccccc1O', 'CCCCCCCCCCCCCCCCCC(=O)O': 'CCCCCCCCCCCCCCCCCC(=O)O', 'SC#N': 'SC#N', 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]': 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]', '[O-]S(=O)[O-].[Na+].[Na+]': '[O-]S(=O)[O-].[Na+].[Na+]', 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C': 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C', 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C', 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]': 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]', '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]': '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]', '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]': '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]', '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]': '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]', '[Cl-].[Cl-].[Cl-].[Ce+3]': '[Cl-].[Cl-].[Cl-].[Ce+3]', 'CNCC(C1=CC(=CC=C1)O)O': 'CNCC(C1=CC(=CC=C1)O)O', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]', 'C1=CC=C(C(=C1)C=NNC(=S)N)O': 'C1=CC=C(C(=C1)C=NNC(=S)N)O', 'C1=CC(=C(C=C1O)O)C=NNC(=S)N': 'C1=CC(=C(C=C1O)O)C=NNC(=S)N', 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O': 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O', 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O': 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O', 'C1=CC2=NNN=C2C=C1Cl': 'C1=CC2=NNN=C2C=C1Cl', 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]': 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]', 'COC(=O)n1nnc2ccccc12': 'COC(=O)n1nnc2ccccc12'}, decorrelate=0.7, encoding=)], exp_rep= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", - "0 24.0 4.0 0.0010 0.10 \n", - "1 24.0 10.0 0.0010 0.10 \n", - "2 24.0 4.0 0.0010 0.10 \n", - "3 24.0 10.0 0.0010 0.10 \n", - "4 24.0 4.0 0.0010 0.10 \n", - ".. ... ... ... ... \n", - "606 24.0 7.0 0.0005 0.05 \n", - "607 24.0 7.0 0.0005 0.05 \n", - "608 24.0 7.0 0.0005 0.05 \n", - "609 24.0 7.0 0.0005 0.05 \n", - "610 24.0 7.0 0.0005 0.05 \n", + "SearchSpace(discrete=SubspaceDiscrete(parameters=[NumericalDiscreteParameter(name='Time_h', encoding=None, _values=[0.0, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 24.0, 48.0, 72.0, 96.0, 120.0, 144.0, 168.0, 192.0, 240.0, 288.0, 336.0, 384.0], tolerance=0.0), NumericalDiscreteParameter(name='pH', encoding=None, _values=[-0.6, -0.4771212547196624, -0.3979400086720376, -0.3010299956639812, -0.1760912590556812, 0.0, 0.3, 0.45, 1.0, 2.0, 7.0, 13.0], tolerance=0.0), NumericalDiscreteParameter(name='Inhib_Concentrat_M', encoding=None, _values=[1e-07, 5e-07, 1e-06, 2e-06, 4e-06, 5e-06, 6e-06, 8e-06, 1e-05, 1.2e-05, 2e-05, 4e-05, 5e-05, 6e-05, 8e-05, 0.0001, 0.0001958863858961802, 0.0002, 0.0003, 0.0003566333808844508, 0.0003917727717923605, 0.0004, 0.0005, 0.0005876591576885406, 0.0006, 0.0007132667617689017, 0.0007835455435847209, 0.0008, 0.0009794319294809011, 0.001, 0.0011, 0.0012, 0.0013, 0.0014, 0.0015, 0.0016, 0.001783166904422254, 0.0018, 0.0019, 0.002, 0.002139800285306705, 0.0025, 0.0026, 0.003, 0.0032, 0.003566333808844508, 0.0039, 0.004, 0.0042, 0.00427960057061341, 0.005, 0.0053, 0.005706134094151214, 0.0065, 0.0075, 0.0085, 0.01, 0.015, 0.02, 0.04, 0.06, 0.08, 0.1], tolerance=0.0), NumericalDiscreteParameter(name='Salt_Concentrat_M', encoding=None, _values=[0.0, 1.0, 2.0], tolerance=0.0), SubstanceParameter(name='SMILES', data={'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O': 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O', 'C(C(CO)([N+](=O)[O-])Br)O': 'C(C(CO)([N+](=O)[O-])Br)O', 'C(CC=O)CC=O': 'C(CC=O)CC=O', 'C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O': 'C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O', 'C1=CC(=CN=C1)C=NNC(=S)N': 'C1=CC(=CN=C1)C=NNC(=S)N', 'C1=CC(=NC(=C1)N)N': 'C1=CC(=NC(=C1)N)N', 'C1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3Cl)N4C=CN=C4': 'C1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3Cl)N4C=CN=C4', 'C1=CC=NC(=C1)C=NNC(=S)N': 'C1=CC=NC(=C1)C=NNC(=S)N', 'C1=CN=C(C=N1)C(=O)N': 'C1=CN=C(C=N1)C(=O)N', 'C1=CN=C(N=C1)N': 'C1=CN=C(N=C1)N', 'C1=CN=CC=C1C=NNC(=S)N': 'C1=CN=CC=C1C=NNC(=S)N', 'C1CCC(=NO)CC1': 'C1CCC(=NO)CC1', 'C1N2CN3CN1CN(C2)C3': 'C1N2CN3CN1CN(C2)C3', 'C=CC1=C(N2C(C(C2=O)NC(=O)C(=NOCC(=O)O)C3=CSC(=N3)N)SC1)C(=O)O': 'C=CC1=C(N2C(C(C2=O)NC(=O)C(=NOCC(=O)O)C3=CSC(=N3)N)SC1)C(=O)O', 'CC(=NO)C': 'CC(=NO)C', 'CC(C)(C)NCC(COC1=CC=CC2=C1CC(C(C2)O)O)O': 'CC(C)(C)NCC(COC1=CC=CC2=C1CC(C(C2)O)O)O', 'CC(C)(C)NCC(COC1=NSN=C1N2CCOCC2)O': 'CC(C)(C)NCC(COC1=NSN=C1N2CCOCC2)O', 'CC(C)NCC(COC1=CC=C(C=C1)CC(=O)N)O': 'CC(C)NCC(COC1=CC=C(C=C1)CC(=O)N)O', 'CC(C)NCC(COC1=CC=CC2=CC=CC=C21)O': 'CC(C)NCC(COC1=CC=CC2=CC=CC=C21)O', 'CC(OC(=O)C)OC(=O)C1=C(CSC2N1C(=O)C2NC(=O)C(=NOC)C3=CC=CO3)COC(=O)N': 'CC(OC(=O)C)OC(=O)C1=C(CSC2N1C(=O)C2NC(=O)C(=NOC)C3=CC=CO3)COC(=O)N', 'CC1=CC=C(C=C1)[N]2N=NC=C2O': 'CC1=CC=C(C=C1)[N]2N=NC=C2O', 'CC1=CN=C(C=N1)C(=O)N': 'CC1=CN=C(C=N1)C(=O)N', 'CC1=NC(=CC=C1)C': 'CC1=NC(=CC=C1)C', 'CCC(=NO)C': 'CCC(=NO)C', 'CCOC(=O)C1=C(C)N=C(S)NC1C2=CC=C(C=C2)Cl': 'CCOC(=O)C1=C(C)N=C(S)NC1C2=CC=C(C=C2)Cl', 'CN(C)CC1CCCCC1(C2=CC(=CC=C2)OC)O': 'CN(C)CC1CCCCC1(C2=CC(=CC=C2)OC)O', 'COC(=O)C1=CC=C(C=C1)[N]2N=NC=C2O': 'COC(=O)C1=CC=C(C=C1)[N]2N=NC=C2O', 'COC1=NC=C(N=C1)C(=O)N': 'COC1=NC=C(N=C1)C(=O)N', 'NC(N)=S': 'NC(N)=S', 'NC1=CCNC(=S)N1': 'NC1=CCNC(=S)N1', 'OC1=C(C=CC=C1)C=NC2=CC=C(C=C2)N=NC3=C(C=CC=C3)N=CC4=C(C=CC=C4)O': 'OC1=C(C=CC=C1)C=NC2=CC=C(C=C2)N=NC3=C(C=CC=C3)N=CC4=C(C=CC=C4)O', 'OC1=CN=N[N]1C2=CC=C(C=C2)Cl': 'OC1=CN=N[N]1C2=CC=C(C=C2)Cl', 'S=c1sc2c([nH]1)cccc2': 'S=c1sc2c([nH]1)cccc2'}, decorrelate=0.7, encoding=)], exp_rep= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "0 0.0 2.0 5.000000e-07 2 \n", + "1 0.0 2.0 1.000000e-06 2 \n", + "2 0.0 2.0 5.000000e-06 2 \n", + "3 0.0 2.0 1.000000e-05 2 \n", + "4 0.0 2.0 5.000000e-05 2 \n", + ".. ... ... ... ... \n", + "303 384.0 -0.6 4.200000e-03 0 \n", + "304 384.0 -0.6 5.300000e-03 0 \n", + "305 384.0 -0.6 6.500000e-03 0 \n", + "306 384.0 -0.6 7.500000e-03 0 \n", + "307 384.0 -0.6 8.500000e-03 0 \n", "\n", - " SMILES \n", - "0 COCCOC(=O)OCSc1nc2c(s1)cccc2 \n", - "1 COCCOC(=O)OCSc1nc2c(s1)cccc2 \n", - "2 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O \n", - "3 Cc1ccc(c(c1)n1nc2c(n1)cccc2)O \n", - "4 Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O \n", - ".. ... \n", - "606 S=c1sc2c([nH]1)cccc2 \n", - "607 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", - "608 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", - "609 C(=O)(C(=O)[O-])[O-] \n", - "610 C(=O)(C(=O)[O-])[O-] \n", + " SMILES \n", + "0 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + "1 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + "2 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + "3 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + "4 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O \n", + ".. ... \n", + "303 S=c1sc2c([nH]1)cccc2 \n", + "304 S=c1sc2c([nH]1)cccc2 \n", + "305 S=c1sc2c([nH]1)cccc2 \n", + "306 S=c1sc2c([nH]1)cccc2 \n", + "307 S=c1sc2c([nH]1)cccc2 \n", "\n", - "[611 rows x 5 columns], metadata= was_recommended was_measured dont_recommend\n", + "[308 rows x 5 columns], metadata= was_recommended was_measured dont_recommend\n", "0 False False False\n", "1 False False False\n", "2 False False False\n", "3 False False False\n", "4 False False False\n", ".. ... ... ...\n", - "606 False False False\n", - "607 False False False\n", - "608 False False False\n", - "609 False False False\n", - "610 False False False\n", + "303 False False False\n", + "304 False False False\n", + "305 False False False\n", + "306 False False False\n", + "307 False False False\n", "\n", - "[611 rows x 3 columns], empty_encoding=False, constraints=[], comp_rep= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M SMILES_MORDRED_ABC \\\n", - "0 24.0 4.0 0.0010 0.10 14.211085 \n", - "1 24.0 10.0 0.0010 0.10 14.211085 \n", - "2 24.0 4.0 0.0010 0.10 13.532488 \n", - "3 24.0 10.0 0.0010 0.10 13.532488 \n", - "4 24.0 4.0 0.0010 0.10 16.206679 \n", - ".. ... ... ... ... ... \n", - "606 24.0 7.0 0.0005 0.05 7.847124 \n", - "607 24.0 7.0 0.0005 0.05 9.238929 \n", - "608 24.0 7.0 0.0005 0.05 9.238929 \n", - "609 24.0 7.0 0.0005 0.05 3.932653 \n", - "610 24.0 7.0 0.0005 0.05 3.932653 \n", + "[308 rows x 3 columns], empty_encoding=False, constraints=[], comp_rep= Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "0 0.0 2.0 5.000000e-07 2 \n", + "1 0.0 2.0 1.000000e-06 2 \n", + "2 0.0 2.0 5.000000e-06 2 \n", + "3 0.0 2.0 1.000000e-05 2 \n", + "4 0.0 2.0 5.000000e-05 2 \n", + ".. ... ... ... ... \n", + "303 384.0 -0.6 4.200000e-03 0 \n", + "304 384.0 -0.6 5.300000e-03 0 \n", + "305 384.0 -0.6 6.500000e-03 0 \n", + "306 384.0 -0.6 7.500000e-03 0 \n", + "307 384.0 -0.6 8.500000e-03 0 \n", "\n", - " SMILES_MORDRED_nAcid SMILES_MORDRED_nBase SMILES_MORDRED_nAromAtom \\\n", - "0 0.0 0.0 9.0 \n", - "1 0.0 0.0 9.0 \n", - "2 0.0 0.0 15.0 \n", - "3 0.0 0.0 15.0 \n", - "4 0.0 0.0 11.0 \n", - ".. ... ... ... \n", - "606 0.0 0.0 9.0 \n", - "607 3.0 0.0 0.0 \n", - "608 3.0 0.0 0.0 \n", - "609 2.0 0.0 0.0 \n", - "610 2.0 0.0 0.0 \n", + " SMILES_RDKIT_MaxAbsEStateIndex SMILES_RDKIT_MinAbsEStateIndex \\\n", + "0 10.148889 1.357824 \n", + "1 10.148889 1.357824 \n", + "2 10.148889 1.357824 \n", + "3 10.148889 1.357824 \n", + "4 10.148889 1.357824 \n", + ".. ... ... \n", + "303 4.975926 0.848333 \n", + "304 4.975926 0.848333 \n", + "305 4.975926 0.848333 \n", + "306 4.975926 0.848333 \n", + "307 4.975926 0.848333 \n", "\n", - " SMILES_MORDRED_nBridgehead SMILES_MORDRED_nHetero ... \\\n", - "0 0.0 7.0 ... \n", - "1 0.0 7.0 ... \n", - "2 0.0 4.0 ... \n", - "3 0.0 4.0 ... \n", - "4 0.0 5.0 ... \n", - ".. ... ... ... \n", - "606 0.0 3.0 ... \n", - "607 0.0 7.0 ... \n", - "608 0.0 7.0 ... \n", - "609 0.0 4.0 ... \n", - "610 0.0 4.0 ... \n", + " SMILES_RDKIT_MinEStateIndex SMILES_RDKIT_qed SMILES_RDKIT_SPS \\\n", + "0 -2.974537 0.454904 10.846154 \n", + "1 -2.974537 0.454904 10.846154 \n", + "2 -2.974537 0.454904 10.846154 \n", + "3 -2.974537 0.454904 10.846154 \n", + "4 -2.974537 0.454904 10.846154 \n", + ".. ... ... ... \n", + "303 0.848333 0.596343 10.400000 \n", + "304 0.848333 0.596343 10.400000 \n", + "305 0.848333 0.596343 10.400000 \n", + "306 0.848333 0.596343 10.400000 \n", + "307 0.848333 0.596343 10.400000 \n", "\n", - " SMILES_MORDRED_JGI2 SMILES_MORDRED_JGI3 SMILES_MORDRED_JGI4 \\\n", - "0 0.053333 0.047348 0.025679 \n", - "1 0.053333 0.047348 0.025679 \n", - "2 0.074074 0.049167 0.050028 \n", - "3 0.074074 0.049167 0.050028 \n", - "4 0.104167 0.046456 0.055718 \n", - ".. ... ... ... \n", - "606 0.059259 0.071970 0.042870 \n", - "607 0.117647 0.085938 0.047059 \n", - "608 0.117647 0.085938 0.047059 \n", - "609 0.148148 0.000000 0.000000 \n", - "610 0.148148 0.000000 0.000000 \n", + " SMILES_RDKIT_MolWt ... SMILES_RDKIT_fr_allylic_oxid \\\n", + "0 189.099 ... 0 \n", + "1 189.099 ... 0 \n", + "2 189.099 ... 0 \n", + "3 189.099 ... 0 \n", + "4 189.099 ... 0 \n", + ".. ... ... ... \n", + "303 167.258 ... 0 \n", + "304 167.258 ... 0 \n", + "305 167.258 ... 0 \n", + "306 167.258 ... 0 \n", + "307 167.258 ... 0 \n", "\n", - " SMILES_MORDRED_JGI5 SMILES_MORDRED_JGI6 SMILES_MORDRED_JGI7 \\\n", - "0 0.021778 0.007407 0.014227 \n", - "1 0.021778 0.007407 0.014227 \n", - "2 0.026569 0.016799 0.012762 \n", - "3 0.026569 0.016799 0.012762 \n", - "4 0.031875 0.020352 0.014901 \n", - ".. ... ... ... \n", - "606 0.040000 0.000000 0.000000 \n", - "607 0.018519 0.000000 0.000000 \n", - "608 0.018519 0.000000 0.000000 \n", - "609 0.000000 0.000000 0.000000 \n", - "610 0.000000 0.000000 0.000000 \n", + " SMILES_RDKIT_fr_aryl_methyl SMILES_RDKIT_fr_bicyclic \\\n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + ".. ... ... \n", + "303 0 1 \n", + "304 0 1 \n", + "305 0 1 \n", + "306 0 1 \n", + "307 0 1 \n", "\n", - " SMILES_MORDRED_JGI8 SMILES_MORDRED_JGI9 SMILES_MORDRED_TopoShapeIndex \\\n", - "0 0.008230 0.006734 0.857143 \n", - "1 0.008230 0.006734 0.857143 \n", - "2 0.010204 0.000000 1.000000 \n", - "3 0.010204 0.000000 1.000000 \n", - "4 0.011255 0.006063 0.833333 \n", - ".. ... ... ... \n", - "606 0.000000 0.000000 0.666667 \n", - "607 0.000000 0.000000 1.000000 \n", - "608 0.000000 0.000000 1.000000 \n", - "609 0.000000 0.000000 0.500000 \n", - "610 0.000000 0.000000 0.500000 \n", + " SMILES_RDKIT_fr_ether SMILES_RDKIT_fr_halogen \\\n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + ".. ... ... \n", + "303 0 0 \n", + "304 0 0 \n", + "305 0 0 \n", + "306 0 0 \n", + "307 0 0 \n", "\n", - " SMILES_MORDRED_MWC06 \n", - "0 7.787797 \n", - "1 7.787797 \n", - "2 8.042056 \n", - "3 8.042056 \n", - "4 8.108623 \n", - ".. ... \n", - "606 7.372118 \n", - "607 7.377134 \n", - "608 7.377134 \n", - "609 5.837730 \n", - "610 5.837730 \n", + " SMILES_RDKIT_fr_morpholine SMILES_RDKIT_fr_oxime \\\n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + ".. ... ... \n", + "303 0 0 \n", + "304 0 0 \n", + "305 0 0 \n", + "306 0 0 \n", + "307 0 0 \n", "\n", - "[611 rows x 147 columns]), continuous=SubspaceContinuous(parameters=[], constraints_lin_eq=[], constraints_lin_ineq=[]))" + " SMILES_RDKIT_fr_priamide SMILES_RDKIT_fr_pyridine \\\n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + ".. ... ... \n", + "303 0 0 \n", + "304 0 0 \n", + "305 0 0 \n", + "306 0 0 \n", + "307 0 0 \n", + "\n", + " SMILES_RDKIT_fr_thiazole \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + ".. ... \n", + "303 1 \n", + "304 1 \n", + "305 1 \n", + "306 1 \n", + "307 1 \n", + "\n", + "[308 rows x 79 columns]), continuous=SubspaceContinuous(parameters=[], constraints_lin_eq=[], constraints_lin_ineq=[]))" ] }, - "execution_count": 9, + "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "searchspace" + "searchspace_rdkit" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 121, "metadata": {}, "outputs": [], "source": [ - "campaign = Campaign(searchspace=searchspace, objective=objective)\n", - "campaign_rand = Campaign(\n", - " searchspace=searchspace,\n", + "campaign_mordred = Campaign(searchspace=searchspace_mordred, objective=objective)\n", + "campaign_morgan = Campaign(searchspace=searchspace_morgan, objective=objective)\n", + "campaign_rdkit = Campaign(searchspace=searchspace_rdkit, objective=objective)\n", + "\n", + "campaign_rand_mordred = Campaign(\n", + " searchspace=searchspace_mordred,\n", + " recommender=TwoPhaseMetaRecommender(recommender=RandomRecommender()),\n", + " objective=objective,\n", + ")\n", + "campaign_rand_morgan = Campaign(\n", + " searchspace=searchspace_morgan,\n", + " recommender=TwoPhaseMetaRecommender(recommender=RandomRecommender()),\n", + " objective=objective,\n", + ")\n", + "campaign_rand_rdkit = Campaign(\n", + " searchspace=searchspace_rdkit,\n", " recommender=TwoPhaseMetaRecommender(recommender=RandomRecommender()),\n", " objective=objective,\n", ")" @@ -362,71 +595,384 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 122, "metadata": {}, "outputs": [], "source": [ - "scenarios = {\"Test_Scenario\": campaign, \"Random\": campaign_rand}" + "scenarios = {\"Mordred\": campaign_mordred, #\"Random\": campaign_rand_mordred,\n", + " \"Morgan\": campaign_morgan, #\"Morgan Random\": campaign_rand_morgan,\n", + " \"RDKIT\": campaign_rdkit, \"Random\": campaign_rand_rdkit\n", + " }" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 128, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 0/10 [00:00 1\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43msimulate_scenarios\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mscenarios\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mlookup\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mBATCH_SIZE\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_doe_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mN_DOE_ITERATIONS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_mc_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mN_MC_ITERATIONS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mimpute_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mignore\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/simulation.py:261\u001b[0m, in \u001b[0;36msimulate_scenarios\u001b[0;34m(scenarios, lookup, batch_size, n_doe_iterations, initial_data, groupby, n_mc_iterations, impute_mode, noise_percent)\u001b[0m\n\u001b[1;32m 258\u001b[0m combos[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInitial_Data\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(initial_data))\n\u001b[1;32m 260\u001b[0m \u001b[38;5;66;03m# Simulate and unpack\u001b[39;00m\n\u001b[0;32m--> 261\u001b[0m da_results \u001b[38;5;241m=\u001b[39m \u001b[43msimulate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_combos\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcombos\u001b[49m\u001b[43m)\u001b[49m[_RESULT_VARIABLE]\n\u001b[1;32m 262\u001b[0m df_results \u001b[38;5;241m=\u001b[39m unpack_simulation_results(da_results)\n\u001b[1;32m 264\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df_results\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/xyzpy/gen/farming.py:189\u001b[0m, in \u001b[0;36mRunner.run_combos\u001b[0;34m(self, combos, constants, **runner_settings)\u001b[0m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Run combos using the function map and save to dataset.\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \n\u001b[1;32m 177\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;124;03m Keyword arguments supplied to :func:`~xyzpy.combo_runner`.\u001b[39;00m\n\u001b[1;32m 187\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 188\u001b[0m combos \u001b[38;5;241m=\u001b[39m parse_combos(combos)\n\u001b[0;32m--> 189\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_last_ds \u001b[38;5;241m=\u001b[39m \u001b[43mcombo_runner_to_ds\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[43m \u001b[49m\u001b[43mfn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombos\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[43mvar_names\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_var_names\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[43mvar_dims\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_var_dims\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[43m \u001b[49m\u001b[43mvar_coords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_var_coords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[43m \u001b[49m\u001b[43mconstants\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_constants\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mdict\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mconstants\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 196\u001b[0m \u001b[43m \u001b[49m\u001b[43mresources\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_resources\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 198\u001b[0m \u001b[43m \u001b[49m\u001b[43mparse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 199\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefault_runner_settings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrunner_settings\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_last_ds\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/xyzpy/gen/combo_runner.py:628\u001b[0m, in \u001b[0;36mcombo_runner_to_ds\u001b[0;34m(fn, combos, var_names, var_dims, var_coords, cases, constants, resources, attrs, shuffle, parse, to_df, parallel, num_workers, executor, verbosity)\u001b[0m\n\u001b[1;32m 625\u001b[0m info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 627\u001b[0m \u001b[38;5;66;03m# Generate data for all combos\u001b[39;00m\n\u001b[0;32m--> 628\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mcombo_runner_core\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 629\u001b[0m \u001b[43m \u001b[49m\u001b[43mfn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 630\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombos\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 631\u001b[0m \u001b[43m \u001b[49m\u001b[43mcases\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcases\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[43m \u001b[49m\u001b[43mconstants\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresources\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconstants\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 633\u001b[0m \u001b[43m \u001b[49m\u001b[43mparallel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparallel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 634\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_workers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 635\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecutor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexecutor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 636\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbosity\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbosity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 637\u001b[0m \u001b[43m \u001b[49m\u001b[43minfo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minfo\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 638\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mto_df\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mand\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvar_names\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 639\u001b[0m \u001b[43m \u001b[49m\u001b[43mflat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mto_df\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 640\u001b[0m \u001b[43m \u001b[49m\u001b[43mshuffle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshuffle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 641\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 643\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m to_df:\n\u001b[1;32m 644\u001b[0m \u001b[38;5;66;03m# convert flat tuple of results to dataframe\u001b[39;00m\n\u001b[1;32m 645\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results_to_df(\n\u001b[1;32m 646\u001b[0m results,\n\u001b[1;32m 647\u001b[0m settings\u001b[38;5;241m=\u001b[39minfo[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msettings\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 650\u001b[0m var_names\u001b[38;5;241m=\u001b[39mvar_names\n\u001b[1;32m 651\u001b[0m )\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/xyzpy/gen/combo_runner.py:231\u001b[0m, in \u001b[0;36mcombo_runner_core\u001b[0;34m(fn, combos, constants, cases, split, flat, shuffle, parallel, num_workers, executor, verbosity, info)\u001b[0m\n\u001b[1;32m 229\u001b[0m results_linear \u001b[38;5;241m=\u001b[39m _run_linear_executor(executor, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mrun_linear_opts)\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 231\u001b[0m results_linear \u001b[38;5;241m=\u001b[39m \u001b[43m_run_linear_sequential\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrun_linear_opts\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shuffle:\n\u001b[1;32m 234\u001b[0m enum_results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msorted\u001b[39m(\u001b[38;5;28mzip\u001b[39m(enum, results_linear), key\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mlambda\u001b[39;00m x: x[\u001b[38;5;241m0\u001b[39m])\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/xyzpy/gen/combo_runner.py:142\u001b[0m, in \u001b[0;36m_run_linear_sequential\u001b[0;34m(fn, settings, verbosity)\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m verbosity \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m 141\u001b[0m pbar\u001b[38;5;241m.\u001b[39mset_description(\u001b[38;5;28mstr\u001b[39m(kws))\n\u001b[0;32m--> 142\u001b[0m results_linear\u001b[38;5;241m.\u001b[39mappend(\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkws\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 143\u001b[0m pbar\u001b[38;5;241m.\u001b[39mupdate()\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results_linear\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/simulation.py:224\u001b[0m, in \u001b[0;36msimulate_scenarios..simulate\u001b[0;34m(Scenario, Random_Seed, Initial_Data)\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Callable for xyzpy simulation.\"\"\"\u001b[39;00m\n\u001b[1;32m 222\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m initial_data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m initial_data[Initial_Data]\n\u001b[1;32m 223\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m SimulationResult(\n\u001b[0;32m--> 224\u001b[0m \u001b[43m_simulate_groupby\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[43mscenarios\u001b[49m\u001b[43m[\u001b[49m\u001b[43mScenario\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[43mlookup\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_doe_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_doe_iterations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[43minitial_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroupby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_seed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mRandom_Seed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43mimpute_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mimpute_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43mnoise_percent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnoise_percent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 235\u001b[0m )\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/simulation.py:340\u001b[0m, in \u001b[0;36m_simulate_groupby\u001b[0;34m(campaign, lookup, batch_size, n_doe_iterations, initial_data, groupby, random_seed, impute_mode, noise_percent)\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[38;5;66;03m# Run the group simulation\u001b[39;00m\n\u001b[1;32m 339\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 340\u001b[0m df_group \u001b[38;5;241m=\u001b[39m \u001b[43msimulate_experiment\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 341\u001b[0m \u001b[43m \u001b[49m\u001b[43mcampaign_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 342\u001b[0m \u001b[43m \u001b[49m\u001b[43mlookup\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 343\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 344\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_doe_iterations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_doe_iterations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 345\u001b[0m \u001b[43m \u001b[49m\u001b[43minitial_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minitial_data\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 346\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_seed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_seed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[43m \u001b[49m\u001b[43mimpute_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mimpute_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 348\u001b[0m \u001b[43m \u001b[49m\u001b[43mnoise_percent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnoise_percent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 349\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 350\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m NothingToSimulateError:\n\u001b[1;32m 351\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/baybe/simulation.py:473\u001b[0m, in \u001b[0;36msimulate_experiment\u001b[0;34m(campaign, lookup, batch_size, n_doe_iterations, initial_data, random_seed, impute_mode, noise_percent)\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m impute_mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 472\u001b[0m searchspace \u001b[38;5;241m=\u001b[39m campaign\u001b[38;5;241m.\u001b[39msearchspace\u001b[38;5;241m.\u001b[39mdiscrete\u001b[38;5;241m.\u001b[39mexp_rep\n\u001b[0;32m--> 473\u001b[0m missing_inds \u001b[38;5;241m=\u001b[39m \u001b[43msearchspace\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 474\u001b[0m \u001b[43m \u001b[49m\u001b[43msearchspace\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmerge\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlookup\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mleft\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindicator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m_merge\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m 475\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mleft_only\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 476\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 477\u001b[0m campaign\u001b[38;5;241m.\u001b[39msearchspace\u001b[38;5;241m.\u001b[39mdiscrete\u001b[38;5;241m.\u001b[39mmetadata\u001b[38;5;241m.\u001b[39mloc[\n\u001b[1;32m 478\u001b[0m missing_inds, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdont_recommend\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 479\u001b[0m ] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 481\u001b[0m \u001b[38;5;66;03m# Run the DOE loop\u001b[39;00m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/indexes/range.py:1030\u001b[0m, in \u001b[0;36mRangeIndex.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_scalar(key):\n\u001b[1;32m 1024\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m(\n\u001b[1;32m 1025\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124monly integers, slices (`:`), \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1026\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mellipsis (`...`), numpy.newaxis (`None`) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1027\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mand integer or boolean \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1028\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marrays are valid indices\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1029\u001b[0m )\n\u001b[0;32m-> 1030\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getitem__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py:5416\u001b[0m, in \u001b[0;36mIndex.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 5407\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(key) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(key) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 5408\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 5409\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing a boolean indexer with length 0 on an Index with \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 5410\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlength greater than 0 is deprecated and will raise in a \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5413\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[1;32m 5414\u001b[0m )\n\u001b[0;32m-> 5416\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mgetitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5417\u001b[0m \u001b[38;5;66;03m# Because we ruled out integer above, we always get an arraylike here\u001b[39;00m\n\u001b[1;32m 5418\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m result\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n", - "\u001b[0;31mIndexError\u001b[0m: boolean index did not match indexed array along dimension 0; dimension is 611 but corresponding boolean dimension is 921" + " 0%| | 0/40 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "max_yield = lookup[\"Efficiency\"].max()\n", + "# plot_results = results[results['Scenario'].isin(['Mordred', 'Morgan', 'RDKIT'])]\n", + "\n", + "sns.lineplot(\n", + " data=results, x=\"Num_Experiments\", y=\"Efficiency_CumBest\", hue=\"Scenario\", marker=\"x\"\n", + ")\n", + "plt.plot([0.5, N_DOE_ITERATIONS+0.5], [max_yield, max_yield], \"--r\")\n", + "plt.legend(loc=\"lower right\")\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.xlim(0, N_DOE_ITERATIONS+1)\n", + "plt.savefig(\"./AA1000_simulation_10MC_50exp_1batch.png\")" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 125, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ScenarioRandom_SeedIterationNum_ExperimentsEfficiency_MeasurementsEfficiency_IterBestEfficiency_CumBest
0Mordred133701[71.38]71.38000071.380
1Mordred133712[83.742]83.74200083.742
2Mordred133723[67.0]67.00000083.742
3Mordred133734[67.04]67.04000083.742
4Mordred133745[47.0]47.00000083.742
........................
395Random134656[71.72]71.72000092.500
396Random134667[68.00666666666666]68.00666792.500
397Random134678[6.08]6.08000092.500
398Random134689[90.0]90.00000092.500
399Random1346910[45.37]45.37000092.500
\n", + "

400 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Scenario Random_Seed Iteration Num_Experiments Efficiency_Measurements \\\n", + "0 Mordred 1337 0 1 [71.38] \n", + "1 Mordred 1337 1 2 [83.742] \n", + "2 Mordred 1337 2 3 [67.0] \n", + "3 Mordred 1337 3 4 [67.04] \n", + "4 Mordred 1337 4 5 [47.0] \n", + ".. ... ... ... ... ... \n", + "395 Random 1346 5 6 [71.72] \n", + "396 Random 1346 6 7 [68.00666666666666] \n", + "397 Random 1346 7 8 [6.08] \n", + "398 Random 1346 8 9 [90.0] \n", + "399 Random 1346 9 10 [45.37] \n", + "\n", + " Efficiency_IterBest Efficiency_CumBest \n", + "0 71.380000 71.380 \n", + "1 83.742000 83.742 \n", + "2 67.000000 83.742 \n", + "3 67.040000 83.742 \n", + "4 47.000000 83.742 \n", + ".. ... ... \n", + "395 71.720000 92.500 \n", + "396 68.006667 92.500 \n", + "397 6.080000 92.500 \n", + "398 90.000000 92.500 \n", + "399 45.370000 92.500 \n", + "\n", + "[400 rows x 7 columns]" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results" + ] }, { "cell_type": "markdown", diff --git a/run_impute_mode.png b/run_impute_mode.png new file mode 100644 index 0000000..277a900 Binary files /dev/null and b/run_impute_mode.png differ