diff --git a/Scenarios.png b/Scenarios.png new file mode 100644 index 0000000..ce1ca4f Binary files /dev/null and b/Scenarios.png differ diff --git a/alex-baybe-inhibitor.ipynb b/alex-baybe-inhibitor.ipynb new file mode 100644 index 0000000..1033359 --- /dev/null +++ b/alex-baybe-inhibitor.ipynb @@ -0,0 +1,4365 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "count 515.000000 515.000000 515.000000 515.000000 \n", + "mean 157.151456 6.613592 0.006934 0.113476 \n", + "std 212.312862 2.270734 0.012923 0.159137 \n", + "min 0.500000 0.000000 0.000010 0.000000 \n", + "25% 24.000000 4.400000 0.001000 0.050000 \n", + "50% 24.000000 7.000000 0.001000 0.100000 \n", + "75% 240.000000 7.000000 0.003000 0.100000 \n", + "max 672.000000 10.000000 0.100000 0.600000 \n", + "\n", + " Efficiency \n", + "count 515.000000 \n", + "mean 18.439395 \n", + "std 313.671188 \n", + "min -4834.000000 \n", + "25% 25.625000 \n", + "50% 50.000000 \n", + "75% 85.475000 \n", + "max 100.000000 \n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "df_AA2024 = pd.read_excel('data/averaged_filtered_AA2024.xlsx')\n", + "print(df_AA2024.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " SMILES Time_h pH Inhib_Concentrat_M \\\n", + "0 C(=O)(C(=O)[O-])[O-] 24.0 4.0 0.0010 \n", + "1 C(=O)(C(=O)[O-])[O-] 24.0 7.0 0.0005 \n", + "2 C(=O)(C(=O)[O-])[O-] 24.0 10.0 0.0010 \n", + "3 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 4.0 0.0010 \n", + "4 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 7.0 0.0005 \n", + "\n", + " Salt_Concentrat_M Efficiency \n", + "0 0.10 20.00 \n", + "1 0.05 12.35 \n", + "2 0.10 20.00 \n", + "3 0.10 30.00 \n", + "4 0.05 -23.95 \n" + ] + } + ], + "source": [ + "print(df_AA2024.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "count 333.000000 333.000000 3.330000e+02 333.000000 \n", + "mean 79.969970 0.440733 1.510580e-01 0.051051 \n", + "std 143.826377 1.913196 5.784776e-01 0.301263 \n", + "min 0.000000 -0.600000 1.000000e-07 0.000000 \n", + "25% 2.000000 -0.136721 2.000000e-04 0.000000 \n", + "50% 5.500000 0.000000 1.800000e-03 0.000000 \n", + "75% 96.000000 1.000000 4.279601e-03 0.000000 \n", + "max 720.000000 13.000000 3.280000e+00 2.000000 \n", + "\n", + " Efficiency \n", + "count 333.000000 \n", + "mean 60.781498 \n", + "std 22.139768 \n", + "min -49.170000 \n", + "25% 47.066667 \n", + "50% 66.000000 \n", + "75% 77.220000 \n", + "max 96.200000 \n" + ] + } + ], + "source": [ + "df_AA1000 = pd.read_excel('/workspaces/project-project-surface-science-syndicate/data/averaged_filtered_AA1000.xlsx')\n", + "print(df_AA1000.describe())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Construct dataframe to work with" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df = df_AA2024\n", + "df_transfer = df_AA1000" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = pd.concat([df, df_transfer], axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Time_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
count848.000000848.0000008.480000e+02848.000000848.000000
mean126.8431604.1895806.352976e-020.08896235.066659
std192.0556763.6961833.690920e-010.227758245.617010
min0.000000-0.6000001.000000e-070.000000-4834.000000
25%6.0000000.0000005.000000e-040.00000035.000000
50%24.0000004.0000001.000000e-030.01000060.000000
75%144.0000007.0000004.200000e-030.10000080.507500
max720.00000013.0000003.280000e+002.000000100.000000
\n", + "
" + ], + "text/plain": [ + " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "count 848.000000 848.000000 8.480000e+02 848.000000 \n", + "mean 126.843160 4.189580 6.352976e-02 0.088962 \n", + "std 192.055676 3.696183 3.690920e-01 0.227758 \n", + "min 0.000000 -0.600000 1.000000e-07 0.000000 \n", + "25% 6.000000 0.000000 5.000000e-04 0.000000 \n", + "50% 24.000000 4.000000 1.000000e-03 0.010000 \n", + "75% 144.000000 7.000000 4.200000e-03 0.100000 \n", + "max 720.000000 13.000000 3.280000e+00 2.000000 \n", + "\n", + " Efficiency \n", + "count 848.000000 \n", + "mean 35.066659 \n", + "std 245.617010 \n", + "min -4834.000000 \n", + "25% 35.000000 \n", + "50% 60.000000 \n", + "75% 80.507500 \n", + "max 100.000000 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib import pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "for i, column in enumerate(df.columns, 1):\n", + " sns.histplot(df[column])\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set targets/objectives = efficiency for now" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\": Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from baybe.targets import NumericalTarget\n", + "from baybe.objective import Objective\n", + "\n", + "target = NumericalTarget(\n", + " name=\"Efficiency\",\n", + " mode=\"MAX\",\n", + ")\n", + "objective = Objective(mode=\"SINGLE\", targets=[target])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Search Space" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.parameters import NumericalContinuousParameter, CategoricalParameter, NumericalDiscreteParameter\n", + "from baybe.searchspace import SearchSpace\n", + "\n", + "basic_parameters=[\n", + "NumericalDiscreteParameter(\n", + " name=\"Time_h\",\n", + " values=df_combined[\"Time_h\"].unique(),\n", + " tolerance=5/60,\n", + "),\n", + "NumericalDiscreteParameter(\n", + " name=\"pH\",\n", + " values=df_combined[\"pH\"].unique(),\n", + " ), \n", + "NumericalDiscreteParameter(\n", + " name=\"Inhib_Concentrat_M\",\n", + " values=df_combined[\"Inhib_Concentrat_M\"].unique(),\n", + " ),\n", + "NumericalDiscreteParameter(\n", + " name=\"Salt_Concentrat_M\",\n", + " values=df_combined[\"Salt_Concentrat_M\"].unique(),\n", + " ),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Substance parameter**\n", + "\n", + "Instead of values, this parameter accepts data in form of a dictionary. The items correspond to pairs of labels and SMILES. SMILES are string-based representations of molecular structures. Based on these, BayBE can assign each label a set of molecular descriptors as encoding." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'C(=O)(C(=O)[O-])[O-]': 'C(=O)(C(=O)[O-])[O-]', 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O': 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]', 'C1=CC(=C(C=C1O)O)C=NNC(=S)N': 'C1=CC(=C(C=C1O)O)C=NNC(=S)N', 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]': 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]', 'C1=CC(=CC(=C1)S)C(=O)O': 'C1=CC(=CC(=C1)S)C(=O)O', 'C1=CC2=NNN=C2C=C1Cl': 'C1=CC2=NNN=C2C=C1Cl', 'C1=CC=C(C(=C1)C=NNC(=S)N)O': 'C1=CC=C(C(=C1)C=NNC(=S)N)O', 'C1COCCN1CCCS(=O)(=O)O': 'C1COCCN1CCCS(=O)(=O)O', 'C1N2CN3CN1CN(C2)C3': 'C1N2CN3CN1CN(C2)C3', 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1': 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1', 'CC(=O)O': 'CC(=O)O', 'CC(=O)SSC(=O)C': 'CC(=O)SSC(=O)C', 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C': 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C', 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O': 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O', 'CCCCCCCCCCCCCCCCCC(=O)O': 'CCCCCCCCCCCCCCCCCC(=O)O', 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCOS(=O)(=O)O': 'CCCCCCCCCCCCOS(=O)(=O)O', 'CCCCCCCCCCCCc1ccccc1S([O])([O])O': 'CCCCCCCCCCCCc1ccccc1S([O])([O])O', 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O': 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O', 'CCCCOP(=O)(OCCCC)O': 'CCCCOP(=O)(OCCCC)O', 'CCN(C(=S)S)CC': 'CCN(C(=S)S)CC', 'CCOc1ccc2c(c1)nc([nH]2)S': 'CCOc1ccc2c(c1)nc([nH]2)S', 'CCSc1nnc(s1)N': 'CCSc1nnc(s1)N', 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C', 'CNCC(C1=CC(=CC=C1)O)O': 'CNCC(C1=CC(=CC=C1)O)O', 'COC(=O)CCCC1=CNC2=CC=CC=C21': 'COC(=O)CCCC1=CNC2=CC=CC=C21', 'COC(=O)n1nnc2ccccc12': 'COC(=O)n1nnc2ccccc12', 'COCCOC(=O)OCSc1nc2c(s1)cccc2': 'COCCOC(=O)OCSc1nc2c(s1)cccc2', 'COc1ccc2c(c1)[nH]c(=S)[nH]2': 'COc1ccc2c(c1)[nH]c(=S)[nH]2', 'COc1cccc(c1)c1n[nH]c(=S)[nH]1': 'COc1cccc(c1)c1n[nH]c(=S)[nH]1', 'CS[C]1N[N]C(=N1)N': 'CS[C]1N[N]C(=N1)N', 'CSc1[nH]c2c(n1)cc(c(c2)C)C': 'CSc1[nH]c2c(n1)cc(c(c2)C)C', 'CSc1nnc(s1)N': 'CSc1nnc(s1)N', 'Cc1cc(C)nc(n1)S': 'Cc1cc(C)nc(n1)S', 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O': 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O', 'Cc1ccc2c(c1)nc([nH]2)S': 'Cc1ccc2c(c1)nc([nH]2)S', 'Cc1n[nH]c(=S)s1': 'Cc1n[nH]c(=S)s1', 'Cc1nsc(c1)N': 'Cc1nsc(c1)N', 'ClC([C]1N[N]C=N1)(Cl)Cl': 'ClC([C]1N[N]C=N1)(Cl)Cl', 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl': 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl', 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O': 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O', 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1': 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1', 'Clc1ccc2c(c1)[nH]c(n2)S': 'Clc1ccc2c(c1)[nH]c(n2)S', 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1': 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1', 'Cn1cnnc1S': 'Cn1cnnc1S', 'Cn1nnnc1S': 'Cn1nnnc1S', 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]': 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]', 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O': 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O', 'NCC(=O)O': 'NCC(=O)O', 'NO': 'NO', 'Nc1cc(N)nc(n1)S': 'Nc1cc(N)nc(n1)S', 'Nc1cc(S)nc(n1)N': 'Nc1cc(S)nc(n1)N', 'Nc1ccc2c(c1)sc(=S)[nH]2': 'Nc1ccc2c(c1)sc(=S)[nH]2', 'Nc1ccnc(n1)S': 'Nc1ccnc(n1)S', 'Nc1n[nH]c(=S)s1': 'Nc1n[nH]c(=S)s1', 'Nc1n[nH]c(n1)S': 'Nc1n[nH]c(n1)S', 'Nc1n[nH]cn1': 'Nc1n[nH]cn1', 'Nc1nc([nH]n1)C(=O)O': 'Nc1nc([nH]n1)C(=O)O', 'Nc1ncncc1N': 'Nc1ncncc1N', 'Nn1c(NN)nnc1S': 'Nn1c(NN)nnc1S', 'Nn1c(S)nnc1c1ccccc1': 'Nn1c(S)nnc1c1ccccc1', 'Nn1cnnc1': 'Nn1cnnc1', 'O/N=C(/C(=N/O)/C)\\\\C': 'O/N=C(/C(=N/O)/C)\\\\C', 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1': 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1', 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]': 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]', 'OC(=O)/C=C/c1ccccc1': 'OC(=O)/C=C/c1ccccc1', 'OC(=O)CCCCC(=O)O': 'OC(=O)CCCCC(=O)O', 'OC(=O)CCCCCCCCCCCCCCC(=O)O': 'OC(=O)CCCCCCCCCCCCCCC(=O)O', 'OC(=O)CCS': 'OC(=O)CCS', 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O': 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O', 'OC(=O)CS': 'OC(=O)CS', 'OC(=O)Cn1nnnc1S': 'OC(=O)Cn1nnnc1S', 'OC(=O)c1ccc(=S)[nH]c1': 'OC(=O)c1ccc(=S)[nH]c1', 'OC(=O)c1ccc(cc1)N': 'OC(=O)c1ccc(cc1)N', 'OC(=O)c1ccc(cc1)S': 'OC(=O)c1ccc(cc1)S', 'OC(=O)c1ccc(cc1)c1ccccc1': 'OC(=O)c1ccc(cc1)c1ccccc1', 'OC(=O)c1ccccc1': 'OC(=O)c1ccccc1', 'OC(=O)c1ccccc1O': 'OC(=O)c1ccccc1O', 'OC(=O)c1ccccc1S': 'OC(=O)c1ccccc1S', 'OC(=O)c1ccccn1': 'OC(=O)c1ccccn1', 'OC(=O)c1cccnc1': 'OC(=O)c1cccnc1', 'OC(=O)c1cccnc1S': 'OC(=O)c1cccnc1S', 'OC(=O)c1ccncc1': 'OC(=O)c1ccncc1', 'OC(=O)c1n[nH]c(n1)N': 'OC(=O)c1n[nH]c(n1)N', 'OCC(CO)O': 'OCC(CO)O', 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O', 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O', 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O': 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O', 'O[C@H]1C(=O)OCC1(C)C': 'O[C@H]1C(=O)OCC1(C)C', 'Oc1ccc(cc1)C(=O)O': 'Oc1ccc(cc1)C(=O)O', 'Oc1ccc(cc1)S([O])([O])O': 'Oc1ccc(cc1)S([O])([O])O', 'Oc1cccc2c1nccc2': 'Oc1cccc2c1nccc2', 'Oc1ccccc1c1nnc([nH]1)S': 'Oc1ccccc1c1nnc([nH]1)S', 'On1nnc2c1cccc2': 'On1nnc2c1cccc2', 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C': 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C', 'S=c1[nH]c2c([nH]1)cncn2': 'S=c1[nH]c2c([nH]1)cncn2', 'S=c1[nH]c2c([nH]1)nccn2': 'S=c1[nH]c2c([nH]1)nccn2', 'S=c1[nH]nc([nH]1)c1cccnc1': 'S=c1[nH]nc([nH]1)c1cccnc1', 'S=c1[nH]nc([nH]1)c1ccco1': 'S=c1[nH]nc([nH]1)c1ccco1', 'S=c1[nH]nc([nH]1)c1ccncc1': 'S=c1[nH]nc([nH]1)c1ccncc1', 'S=c1sc2c([nH]1)cccc2': 'S=c1sc2c([nH]1)cccc2', 'SC#N': 'SC#N', 'S[C]1NC2=C[CH]C=NC2=N1': 'S[C]1NC2=C[CH]C=NC2=N1', 'Sc1n[nH]cn1': 'Sc1n[nH]cn1', 'Sc1nc(N)c(c(n1)S)N': 'Sc1nc(N)c(c(n1)S)N', 'Sc1nc(N)c2c(n1)[nH]nc2': 'Sc1nc(N)c2c(n1)[nH]nc2', 'Sc1nc2c([nH]1)cccc2': 'Sc1nc2c([nH]1)cccc2', 'Sc1ncc[nH]1': 'Sc1ncc[nH]1', 'Sc1ncccn1': 'Sc1ncccn1', 'Sc1nnc(s1)S': 'Sc1nnc(s1)S', '[Cl-].[Cl-].[Cl-].[Ce+3]': '[Cl-].[Cl-].[Cl-].[Ce+3]', '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]': '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]', '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]': '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]', '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]': '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]', '[O-]S(=O)[O-].[Na+].[Na+]': '[O-]S(=O)[O-].[Na+].[Na+]', 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]': 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]', 'c1ccc(nc1)c1ccccn1': 'c1ccc(nc1)c1ccccn1', 'c1ccc2c(c1)[nH]nn2': 'c1ccc2c(c1)[nH]nn2', 'c1ncn[nH]1': 'c1ncn[nH]1', 'C(C(CO)([N+](=O)[O-])Br)O': 'C(C(CO)([N+](=O)[O-])Br)O', 'C(CC=O)CC=O': 'C(CC=O)CC=O', 'C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O': 'C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O', 'C1=CC(=CN=C1)C=NNC(=S)N': 'C1=CC(=CN=C1)C=NNC(=S)N', 'C1=CC(=NC(=C1)N)N': 'C1=CC(=NC(=C1)N)N', 'C1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3Cl)N4C=CN=C4': 'C1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3Cl)N4C=CN=C4', 'C1=CC=NC(=C1)C=NNC(=S)N': 'C1=CC=NC(=C1)C=NNC(=S)N', 'C1=CN=C(C=N1)C(=O)N': 'C1=CN=C(C=N1)C(=O)N', 'C1=CN=C(N=C1)N': 'C1=CN=C(N=C1)N', 'C1=CN=CC=C1C=NNC(=S)N': 'C1=CN=CC=C1C=NNC(=S)N', 'C1CCC(=NO)CC1': 'C1CCC(=NO)CC1', 'C=CC1=C(N2C(C(C2=O)NC(=O)C(=NOCC(=O)O)C3=CSC(=N3)N)SC1)C(=O)O': 'C=CC1=C(N2C(C(C2=O)NC(=O)C(=NOCC(=O)O)C3=CSC(=N3)N)SC1)C(=O)O', 'CC(=NO)C': 'CC(=NO)C', 'CC(C)(C)NCC(COC1=CC=CC2=C1CC(C(C2)O)O)O': 'CC(C)(C)NCC(COC1=CC=CC2=C1CC(C(C2)O)O)O', 'CC(C)(C)NCC(COC1=NSN=C1N2CCOCC2)O': 'CC(C)(C)NCC(COC1=NSN=C1N2CCOCC2)O', 'CC(C)NCC(COC1=CC=C(C=C1)CC(=O)N)O': 'CC(C)NCC(COC1=CC=C(C=C1)CC(=O)N)O', 'CC(C)NCC(COC1=CC=CC2=CC=CC=C21)O': 'CC(C)NCC(COC1=CC=CC2=CC=CC=C21)O', 'CC(OC(=O)C)OC(=O)C1=C(CSC2N1C(=O)C2NC(=O)C(=NOC)C3=CC=CO3)COC(=O)N': 'CC(OC(=O)C)OC(=O)C1=C(CSC2N1C(=O)C2NC(=O)C(=NOC)C3=CC=CO3)COC(=O)N', 'CC1=CC=C(C=C1)[N]2N=NC=C2O': 'CC1=CC=C(C=C1)[N]2N=NC=C2O', 'CC1=CN=C(C=N1)C(=O)N': 'CC1=CN=C(C=N1)C(=O)N', 'CC1=NC(=CC=C1)C': 'CC1=NC(=CC=C1)C', 'CCC(=NO)C': 'CCC(=NO)C', 'CCOC(=O)C1=C(C)N=C(S)NC1C2=CC=C(C=C2)Cl': 'CCOC(=O)C1=C(C)N=C(S)NC1C2=CC=C(C=C2)Cl', 'CN(C)CC1CCCCC1(C2=CC(=CC=C2)OC)O': 'CN(C)CC1CCCCC1(C2=CC(=CC=C2)OC)O', 'COC(=O)C1=CC=C(C=C1)[N]2N=NC=C2O': 'COC(=O)C1=CC=C(C=C1)[N]2N=NC=C2O', 'COC1=NC=C(N=C1)C(=O)N': 'COC1=NC=C(N=C1)C(=O)N', 'NC(N)=S': 'NC(N)=S', 'NC1=CCNC(=S)N1': 'NC1=CCNC(=S)N1', 'OC1=C(C=CC=C1)C=NC2=CC=C(C=C2)N=NC3=C(C=CC=C3)N=CC4=C(C=CC=C4)O': 'OC1=C(C=CC=C1)C=NC2=CC=C(C=C2)N=NC3=C(C=CC=C3)N=CC4=C(C=CC=C4)O', 'OC1=CN=N[N]1C2=CC=C(C=C2)Cl': 'OC1=CN=N[N]1C2=CC=C(C=C2)Cl'}\n" + ] + } + ], + "source": [ + "#Create a dictionary of unique SMILES representations found in the dataframe\n", + "\n", + "unique_SMILES = df_combined[\"SMILES\"].unique()\n", + "\n", + "dic_SMILES = dict(zip(unique_SMILES, unique_SMILES))\n", + "\n", + "print(dic_SMILES)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.parameters import SubstanceParameter\n", + "\n", + "mordred_parameters = basic_parameters + [\n", + " SubstanceParameter(\n", + " name=\"SMILES\",\n", + " data=dic_SMILES,\n", + " encoding=\"MORDRED\",\n", + " )\n", + "]\n", + "\n", + "rdkit_parameters = basic_parameters + [\n", + " SubstanceParameter(\n", + " name=\"SMILES\",\n", + " data=dic_SMILES,\n", + " encoding=\"RDKIT\",\n", + " )\n", + "]\n", + "\n", + "morganfp_parameters = basic_parameters + [\n", + " SubstanceParameter(\n", + " name=\"SMILES\",\n", + " data=dic_SMILES,\n", + " encoding=\"MORGAN_FP\",\n", + " )\n", + "]\n", + "\n", + "ohe_parameters = basic_parameters + [\n", + " CategoricalParameter(\n", + " name=\"SMILES\",\n", + " values=unique_SMILES,\n", + " encoding=\"OHE\",\n", + " )\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C(=O)(C(=O)[O-])[O-]')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC(=C(C=C1O)O)C=NNC(=S)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC(=CC(=C1)S)C(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC2=NNN=C2C=C1Cl')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC=C(C(=C1)C=NNC(=S)N)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1COCCN1CCCS(=O)(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1N2CN3CN1CN(C2)C3')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C=CC(=O)OCCOC(=O)OCCSc1ncccn1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC(=O)SSC(=O)C')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C')\n", + "_______________________________________smiles_to_mordred_features - 0.3s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCCCCC/C=C\\\\CCCCCCCC(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCCCCCCCCCCCCCCC(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCCCCCCCCCOS(=O)(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCCCCCCCCCc1ccccc1S([O])([O])O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.5s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCCCOP(=O)(OCCCC)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCN(C(=S)S)CC')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCOc1ccc2c(c1)nc([nH]2)S')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCSc1nnc(s1)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CN1C=NC2=C1C(=O)N(C(=O)N2C)C')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CNCC(C1=CC(=CC=C1)O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('COC(=O)CCCC1=CNC2=CC=CC=C21')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('COC(=O)n1nnc2ccccc12')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('COCCOC(=O)OCSc1nc2c(s1)cccc2')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('COc1ccc2c(c1)[nH]c(=S)[nH]2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('COc1cccc(c1)c1n[nH]c(=S)[nH]1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CS[C]1N[N]C(=N1)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CSc1[nH]c2c(n1)cc(c(c2)C)C')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CSc1nnc(s1)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Cc1cc(C)nc(n1)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Cc1ccc(c(c1)n1nc2c(n1)cccc2)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Cc1ccc2c(c1)nc([nH]2)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Cc1n[nH]c(=S)s1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Cc1nsc(c1)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('ClC([C]1N[N]C=N1)(Cl)Cl')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Clc1cc2[nH]c(=S)[nH]c2cc1Cl')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Clc1ccc2c(c1)[nH]c(n2)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Clc1cccc(c1)c1n[nH]c(=S)[nH]1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Cn1cnnc1S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Cn1nnnc1S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('NCC(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('NO')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1cc(N)nc(n1)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1cc(S)nc(n1)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1ccc2c(c1)sc(=S)[nH]2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1ccnc(n1)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1n[nH]c(=S)s1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1n[nH]c(n1)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1n[nH]cn1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1nc([nH]n1)C(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nc1ncncc1N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nn1c(NN)nnc1S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nn1c(S)nnc1c1ccccc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Nn1cnnc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('O/N=C(/C(=N/O)/C)\\\\C')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)/C=C/c1ccccc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)CCCCC(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)CCCCCCCCCCCCCCC(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)CCS')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)CS')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)Cn1nnnc1S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccc(=S)[nH]c1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccc(cc1)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccc(cc1)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccc(cc1)c1ccccc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccccc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccccc1O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccccc1S')\n", + "_______________________________________smiles_to_mordred_features - 0.3s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccccn1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1cccnc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1cccnc1S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1ccncc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC(=O)c1n[nH]c(n1)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OCC(CO)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('O[C@H]1C(=O)OCC1(C)C')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Oc1ccc(cc1)C(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Oc1ccc(cc1)S([O])([O])O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Oc1cccc2c1nccc2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Oc1ccccc1c1nnc([nH]1)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('On1nnc2c1cccc2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('S=c1[nH]c2c([nH]1)c(=O)n(cn2)C')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('S=c1[nH]c2c([nH]1)cncn2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('S=c1[nH]c2c([nH]1)nccn2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('S=c1[nH]nc([nH]1)c1cccnc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('S=c1[nH]nc([nH]1)c1ccco1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('S=c1[nH]nc([nH]1)c1ccncc1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('S=c1sc2c([nH]1)cccc2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('SC#N')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('S[C]1NC2=C[CH]C=NC2=N1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Sc1n[nH]cn1')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Sc1nc(N)c(c(n1)S)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Sc1nc(N)c2c(n1)[nH]nc2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Sc1nc2c([nH]1)cccc2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Sc1ncc[nH]1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Sc1ncccn1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('Sc1nnc(s1)S')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('[Cl-].[Cl-].[Cl-].[Ce+3]')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('[O-]S(=O)[O-].[Na+].[Na+]')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('c1ccc(nc1)c1ccccn1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('c1ccc2c(c1)[nH]nn2')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('c1ncn[nH]1')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C(C(CO)([N+](=O)[O-])Br)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C(CC=O)CC=O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC(=CN=C1)C=NNC(=S)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC(=NC(=C1)N)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3Cl)N4C=CN=C4')\n", + "_______________________________________smiles_to_mordred_features - 0.4s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CC=NC(=C1)C=NNC(=S)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CN=C(C=N1)C(=O)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CN=C(N=C1)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1=CN=CC=C1C=NNC(=S)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C1CCC(=NO)CC1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('C=CC1=C(N2C(C(C2=O)NC(=O)C(=NOCC(=O)O)C3=CSC(=N3)N)SC1)C(=O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.3s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC(=NO)C')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC(C)(C)NCC(COC1=CC=CC2=C1CC(C(C2)O)O)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC(C)(C)NCC(COC1=NSN=C1N2CCOCC2)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC(C)NCC(COC1=CC=C(C=C1)CC(=O)N)O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC(C)NCC(COC1=CC=CC2=CC=CC=C21)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC(OC(=O)C)OC(=O)C1=C(CSC2N1C(=O)C2NC(=O)C(=NOC)C3=CC=CO3)COC(=O)N')\n", + "_______________________________________smiles_to_mordred_features - 0.5s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC1=CC=C(C=C1)[N]2N=NC=C2O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC1=CN=C(C=N1)C(=O)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CC1=NC(=CC=C1)C')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCC(=NO)C')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CCOC(=O)C1=C(C)N=C(S)NC1C2=CC=C(C=C2)Cl')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('CN(C)CC1CCCCC1(C2=CC(=CC=C2)OC)O')\n", + "_______________________________________smiles_to_mordred_features - 0.2s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('COC(=O)C1=CC=C(C=C1)[N]2N=NC=C2O')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('COC1=NC=C(N=C1)C(=O)N')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('NC(N)=S')\n", + "_______________________________________smiles_to_mordred_features - 0.0s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('NC1=CCNC(=S)N1')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC1=C(C=CC=C1)C=NC2=CC=C(C=C2)N=NC3=C(C=CC=C3)N=CC4=C(C=CC=C4)O')\n", + "_______________________________________smiles_to_mordred_features - 0.3s, 0.0min\n", + "________________________________________________________________________________\n", + "[Memory] Calling baybe.utils.chemistry._smiles_to_mordred_features...\n", + "_smiles_to_mordred_features('OC1=CN=N[N]1C2=CC=C(C=C2)Cl')\n", + "_______________________________________smiles_to_mordred_features - 0.1s, 0.0min\n" + ] + } + ], + "source": [ + "searchspace_mordred = SearchSpace.from_dataframe(df.drop(\"Efficiency\", axis = 1), mordred_parameters)\n", + "searchspace_rdkit = SearchSpace.from_dataframe(df.drop(\"Efficiency\", axis = 1), rdkit_parameters)\n", + "searchspace_morganfp = SearchSpace.from_dataframe(df.drop(\"Efficiency\", axis = 1), morganfp_parameters)\n", + "searchspace_ohe = SearchSpace.from_dataframe(df.drop(\"Efficiency\", axis = 1), ohe_parameters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Recommenders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **SequentialGreedyRecommender** is a powerful recommender that leverages BoTorch optimization functions to perform sequential Greedy optimization. It can be applied for discrete, continuous and hybrid sarch spaces. It is an implementation of the BoTorch optimization functions for discrete, continuous and mixed spaces. **It is important to note that this recommender performs a brute-force search when applied in hybrid search spaces, as it optimizes the continuous part of the space while exhaustively searching choices in the discrete subspace.** You can customize this behavior to only sample a certain percentage of the discrete subspace via the sample_percentage attribute and to choose different sampling strategies via the hybrid_sampler attribute. \n", + "\n", + "e.g.\n", + "strategy = TwoPhaseStrategy(recommender=SequentialGreedyRecommender(hybrid_sampler=\"Farthest\", sampling_percentage=0.3))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For implementing fully customized surrogate models e.g. from sklearn or PyTorch, see:\n", + "https://emdgroup.github.io/baybe/examples/Custom_Surrogates/Custom_Surrogates.html\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.recommenders import (\n", + " SequentialGreedyRecommender,\n", + " FPSRecommender,\n", + " TwoPhaseMetaRecommender,\n", + ")\n", + "\n", + "recommender = TwoPhaseMetaRecommender(\n", + " initial_recommender=FPSRecommender(), # farthest point sampling\n", + " recommender=SequentialGreedyRecommender(), # Bayesian model-based optimization\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Campaign Strategy" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.strategies import TwoPhaseStrategy\n", + "from baybe import Campaign\n", + "\n", + "campaign_rdkit = Campaign(searchspace_rdkit, objective)\n", + "campaign_mordred = Campaign(searchspace_mordred, objective)\n", + "campaign_morganfp = Campaign(searchspace_morganfp, objective)\n", + "campaign_ohe = Campaign(searchspace_ohe, objective)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Start simulation" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "scenarios = {\n", + " \"OHE\" : campaign_ohe,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/10 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.lineplot(\n", + " data=results,\n", + " x=\"Num_Experiments\",\n", + " y=\"Efficiency_CumBest\",\n", + " hue=\"Scenario\",\n", + " marker=\"x\",\n", + ")\n", + "\n", + "plt.xlim(0, N_DOE_ITERATIONS)\n", + "plt.ylim(0, 100)\n", + "\n", + "plt.savefig(\"Scenarios\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evalute best results" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['Scenario', 'Random_Seed', 'Iteration', 'Num_Experiments',\n", + " 'Efficiency_Measurements', 'Efficiency_IterBest', 'Efficiency_CumBest'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(results.columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use transfer learning on other metals" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the training function (i.e. the previously investigated larger data set) and the yet to be optimized data sets." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.parameters import TaskParameter\n", + "\n", + "taskparam = TaskParameter(\n", + " name=\"Al_alloys\",\n", + " values=[\"AA1000\", \"AA2024\"],\n", + " active_values=[\"AA2024\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "unique_SMILES_transfer = df_transfer[\"SMILES\"].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.parameters import NumericalContinuousParameter, CategoricalParameter, NumericalDiscreteParameter\n", + "from baybe.searchspace import SearchSpace\n", + "\n", + "transfer_parameters=[\n", + "NumericalDiscreteParameter(\n", + " name=\"Time_h\",\n", + " values=df_combined[\"Time_h\"].unique(),\n", + " tolerance=5/60,\n", + "),\n", + "NumericalDiscreteParameter(\n", + " name=\"pH\",\n", + " values=df_combined[\"pH\"].unique(),\n", + " ), \n", + "NumericalDiscreteParameter(\n", + " name=\"Inhib_Concentrat_M\",\n", + " values=df_combined[\"Inhib_Concentrat_M\"].unique(),\n", + " ),\n", + "NumericalDiscreteParameter(\n", + " name=\"Salt_Concentrat_M\",\n", + " values=df_combined[\"Salt_Concentrat_M\"].unique(),\n", + " ),\n", + "CategoricalParameter(\n", + " name=\"SMILES\",\n", + " values=unique_SMILES,\n", + " encoding=\"OHE\",\n", + " )\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "searchspace_transfer = SearchSpace.from_dataframe(df_transfer.drop(\"Efficiency\", axis = 1), transfer_parameters)\n", + "\n", + "campaign_transfer = Campaign(searchspace_transfer, objective)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "df_features = df.drop(\"Efficiency\", axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/10 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Time_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
count258.000000258.000000258.000000258.000000258.000000
mean167.6027136.6360470.0073860.11790728.268191
std220.4887882.1496130.0132020.166813265.800655
min0.5000000.0000000.0000100.000000-3813.000000
25%24.0000005.4000000.0010000.05000030.000000
50%24.0000007.0000000.0010000.10000055.000000
75%240.0000007.0000000.0045000.10000089.000000
max672.00000010.0000000.0440000.600000100.000000
\n", + "" + ], + "text/plain": [ + " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "count 258.000000 258.000000 258.000000 258.000000 \n", + "mean 167.602713 6.636047 0.007386 0.117907 \n", + "std 220.488788 2.149613 0.013202 0.166813 \n", + "min 0.500000 0.000000 0.000010 0.000000 \n", + "25% 24.000000 5.400000 0.001000 0.050000 \n", + "50% 24.000000 7.000000 0.001000 0.100000 \n", + "75% 240.000000 7.000000 0.004500 0.100000 \n", + "max 672.000000 10.000000 0.044000 0.600000 \n", + "\n", + " Efficiency \n", + "count 258.000000 \n", + "mean 28.268191 \n", + "std 265.800655 \n", + "min -3813.000000 \n", + "25% 30.000000 \n", + "50% 55.000000 \n", + "75% 89.000000 \n", + "max 100.000000 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fraction_df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "No objects to concatenate", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[26], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFraction\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m fractions[i]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Concatenate the dataframes into a single dataframe\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m concatenated_df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresults_transfer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Print the concatenated dataframe\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(concatenated_df)\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/reshape/concat.py:382\u001b[0m, in \u001b[0;36mconcat\u001b[0;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m copy \u001b[38;5;129;01mand\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 380\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m--> 382\u001b[0m op \u001b[38;5;241m=\u001b[39m \u001b[43m_Concatenator\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 383\u001b[0m \u001b[43m \u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 384\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 385\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mnames\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnames\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43mverify_integrity\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverify_integrity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 391\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 395\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m op\u001b[38;5;241m.\u001b[39mget_result()\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/reshape/concat.py:445\u001b[0m, in \u001b[0;36m_Concatenator.__init__\u001b[0;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverify_integrity \u001b[38;5;241m=\u001b[39m verify_integrity\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy \u001b[38;5;241m=\u001b[39m copy\n\u001b[0;32m--> 445\u001b[0m objs, keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_clean_keys_and_objs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# figure out what our result ndim is going to be\u001b[39;00m\n\u001b[1;32m 448\u001b[0m ndims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ndims(objs)\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/reshape/concat.py:507\u001b[0m, in \u001b[0;36m_Concatenator._clean_keys_and_objs\u001b[0;34m(self, objs, keys)\u001b[0m\n\u001b[1;32m 504\u001b[0m objs_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(objs)\n\u001b[1;32m 506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(objs_list) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 507\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo objects to concatenate\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 509\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 510\u001b[0m objs_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(com\u001b[38;5;241m.\u001b[39mnot_none(\u001b[38;5;241m*\u001b[39mobjs_list))\n", + "\u001b[0;31mValueError\u001b[0m: No objects to concatenate" + ] + } + ], + "source": [ + "# Add an additional column to each dataframe\n", + "fractions = (0.01, 0.2)\n", + "for i, df in enumerate(results_transfer):\n", + " df['Fraction'] = fractions[i]\n", + "\n", + "# Concatenate the dataframes into a single dataframe\n", + "concatenated_df = pd.concat(results_transfer, ignore_index=True)\n", + "\n", + "# Print the concatenated dataframe\n", + "print(concatenated_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (750597114.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Cell \u001b[0;32mIn[27], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m results =\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "results = " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'result_fraction' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[28], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m ax \u001b[38;5;241m=\u001b[39m sns\u001b[38;5;241m.\u001b[39mlineplot(\n\u001b[0;32m----> 2\u001b[0m data\u001b[38;5;241m=\u001b[39m\u001b[43mresult_fraction\u001b[49m,\n\u001b[1;32m 3\u001b[0m marker\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mo\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m markersize\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m,\n\u001b[1;32m 5\u001b[0m x\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNum_Experiments\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 6\u001b[0m y\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEfficiency_CumBest\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 7\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'result_fraction' is not defined" + ] + } + ], + "source": [ + "ax = sns.lineplot(\n", + " data=result_fraction,\n", + " marker=\"o\",\n", + " markersize=10,\n", + " x=\"Num_Experiments\",\n", + " y=\"Efficiency_CumBest\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ax = sns.lineplot(\n", + " data=result_transfer_learning,\n", + " marker=\"o\",\n", + " markersize=10,\n", + " x=\"Num_Experiments\",\n", + " y=\"Efficiency_CumBest\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/alex/Scenarios.png b/alex/Scenarios.png deleted file mode 100644 index b38ca2a..0000000 Binary files a/alex/Scenarios.png and /dev/null differ diff --git a/alex/alex-baybe-inhibitor.ipynb b/alex/alex-baybe-inhibitor.ipynb deleted file mode 100644 index 41ab0ed..0000000 --- a/alex/alex-baybe-inhibitor.ipynb +++ /dev/null @@ -1,3831 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Imports" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", - "count 515.000000 515.000000 515.000000 515.000000 \n", - "mean 157.151456 6.613592 0.006934 0.113476 \n", - "std 212.312862 2.270734 0.012923 0.159137 \n", - "min 0.500000 0.000000 0.000010 0.000000 \n", - "25% 24.000000 4.400000 0.001000 0.050000 \n", - "50% 24.000000 7.000000 0.001000 0.100000 \n", - "75% 240.000000 7.000000 0.003000 0.100000 \n", - "max 672.000000 10.000000 0.100000 0.600000 \n", - "\n", - " Efficiency \n", - "count 515.000000 \n", - "mean 18.439395 \n", - "std 313.671188 \n", - "min -4834.000000 \n", - "25% 25.625000 \n", - "50% 50.000000 \n", - "75% 85.475000 \n", - "max 100.000000 \n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "df_AA2024 = pd.read_excel('/data/averaged_filtered_AA2024.xlsx')\n", - "print(df_AA2024.describe())" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " SMILES Time_h pH Inhib_Concentrat_M \\\n", - "0 C(=O)(C(=O)[O-])[O-] 24.0 4.0 0.0010 \n", - "1 C(=O)(C(=O)[O-])[O-] 24.0 7.0 0.0005 \n", - "2 C(=O)(C(=O)[O-])[O-] 24.0 10.0 0.0010 \n", - "3 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 4.0 0.0010 \n", - "4 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 7.0 0.0005 \n", - "\n", - " Salt_Concentrat_M Efficiency \n", - "0 0.10 20.00 \n", - "1 0.05 12.35 \n", - "2 0.10 20.00 \n", - "3 0.10 30.00 \n", - "4 0.05 -23.95 \n" - ] - } - ], - "source": [ - "print(df_AA2024.head())" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", - "count 333.000000 333.000000 3.330000e+02 333.000000 \n", - "mean 79.969970 0.440733 1.510580e-01 0.051051 \n", - "std 143.826377 1.913196 5.784776e-01 0.301263 \n", - "min 0.000000 -0.600000 1.000000e-07 0.000000 \n", - "25% 2.000000 -0.136721 2.000000e-04 0.000000 \n", - "50% 5.500000 0.000000 1.800000e-03 0.000000 \n", - "75% 96.000000 1.000000 4.279601e-03 0.000000 \n", - "max 720.000000 13.000000 3.280000e+00 2.000000 \n", - "\n", - " Efficiency \n", - "count 333.000000 \n", - "mean 60.781498 \n", - "std 22.139768 \n", - "min -49.170000 \n", - "25% 47.066667 \n", - "50% 66.000000 \n", - "75% 77.220000 \n", - "max 96.200000 \n" - ] - } - ], - "source": [ - "df_AA1000 = pd.read_excel('/workspaces/project-project-surface-science-syndicate/data/averaged_filtered_AA1000.xlsx')\n", - "print(df_AA1000.describe())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Construct dataframe to work with" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "df = df_AA2024\n", - "df_transfer = df_AA1000" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "df_combined = pd.concat([df, df_transfer], axis=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Time_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
count848.000000848.0000008.480000e+02848.000000848.000000
mean126.8431604.1895806.352976e-020.08896235.066659
std192.0556763.6961833.690920e-010.227758245.617010
min0.000000-0.6000001.000000e-070.000000-4834.000000
25%6.0000000.0000005.000000e-040.00000035.000000
50%24.0000004.0000001.000000e-030.01000060.000000
75%144.0000007.0000004.200000e-030.10000080.507500
max720.00000013.0000003.280000e+002.000000100.000000
\n", - "
" - ], - "text/plain": [ - " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", - "count 848.000000 848.000000 8.480000e+02 848.000000 \n", - "mean 126.843160 4.189580 6.352976e-02 0.088962 \n", - "std 192.055676 3.696183 3.690920e-01 0.227758 \n", - "min 0.000000 -0.600000 1.000000e-07 0.000000 \n", - "25% 6.000000 0.000000 5.000000e-04 0.000000 \n", - "50% 24.000000 4.000000 1.000000e-03 0.010000 \n", - "75% 144.000000 7.000000 4.200000e-03 0.100000 \n", - "max 720.000000 13.000000 3.280000e+00 2.000000 \n", - "\n", - " Efficiency \n", - "count 848.000000 \n", - "mean 35.066659 \n", - "std 245.617010 \n", - "min -4834.000000 \n", - "25% 35.000000 \n", - "50% 60.000000 \n", - "75% 80.507500 \n", - "max 100.000000 " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_combined.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from matplotlib import pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "for i, column in enumerate(df.columns, 1):\n", - " sns.histplot(df[column])\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set targets/objectives = efficiency for now" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\": Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n", - " warnings.warn(\n", - "/home/vscode/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "from baybe.targets import NumericalTarget\n", - "from baybe.objective import Objective\n", - "\n", - "target = NumericalTarget(\n", - " name=\"Efficiency\",\n", - " mode=\"MAX\",\n", - ")\n", - "objective = Objective(mode=\"SINGLE\", targets=[target])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Search Space" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from baybe.parameters import NumericalContinuousParameter, CategoricalParameter, NumericalDiscreteParameter\n", - "from baybe.searchspace import SearchSpace\n", - "\n", - "basic_parameters=[\n", - "NumericalDiscreteParameter(\n", - " name=\"Time_h\",\n", - " values=df_combined[\"Time_h\"].unique(),\n", - " tolerance=5/60,\n", - "),\n", - "NumericalDiscreteParameter(\n", - " name=\"pH\",\n", - " values=df_combined[\"pH\"].unique(),\n", - " ), \n", - "NumericalDiscreteParameter(\n", - " name=\"Inhib_Concentrat_M\",\n", - " values=df_combined[\"Inhib_Concentrat_M\"].unique(),\n", - " ),\n", - "NumericalDiscreteParameter(\n", - " name=\"Salt_Concentrat_M\",\n", - " values=df_combined[\"Salt_Concentrat_M\"].unique(),\n", - " ),\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Substance parameter**\n", - "\n", - "Instead of values, this parameter accepts data in form of a dictionary. The items correspond to pairs of labels and SMILES. SMILES are string-based representations of molecular structures. Based on these, BayBE can assign each label a set of molecular descriptors as encoding." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'C(=O)(C(=O)[O-])[O-]': 'C(=O)(C(=O)[O-])[O-]', 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O': 'C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Fe+2]', 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]': 'C(C(C(C(C(C(=O)[O-])O)O)O)O)O.C(C(C(C(C(C(=O)[O-])O)O)O)O)O.[Zn+2]', 'C1=CC(=C(C=C1O)O)C=NNC(=S)N': 'C1=CC(=C(C=C1O)O)C=NNC(=S)N', 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]': 'C1=CC(=C(C=C1SSC2=CC(=C(C=C2)[N+](=O)[O-])C(=O)O)C(=O)O)[N+](=O)[O-]', 'C1=CC(=CC(=C1)S)C(=O)O': 'C1=CC(=CC(=C1)S)C(=O)O', 'C1=CC2=NNN=C2C=C1Cl': 'C1=CC2=NNN=C2C=C1Cl', 'C1=CC=C(C(=C1)C=NNC(=S)N)O': 'C1=CC=C(C(=C1)C=NNC(=S)N)O', 'C1COCCN1CCCS(=O)(=O)O': 'C1COCCN1CCCS(=O)(=O)O', 'C1N2CN3CN1CN(C2)C3': 'C1N2CN3CN1CN(C2)C3', 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1': 'C=CC(=O)OCCOC(=O)OCCSc1ncccn1', 'CC(=O)O': 'CC(=O)O', 'CC(=O)SSC(=O)C': 'CC(=O)SSC(=O)C', 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C': 'CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C', 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O': 'CCCCCCCC/C=C\\\\CCCCCCCC(=O)O', 'CCCCCCCCCCCCCCCCCC(=O)O': 'CCCCCCCCCCCCCCCCCC(=O)O', 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCCCCCCCCCOS(=O)(=O)O': 'CCCCCCCCCCCCOS(=O)(=O)O', 'CCCCCCCCCCCCc1ccccc1S([O])([O])O': 'CCCCCCCCCCCCc1ccccc1S([O])([O])O', 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]': 'CCCCCCCCN(CC(=O)O[Na])CC(=O)O[Na]', 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O': 'CCCCN(CCCC)C1=NC(=NC(=N1)NC(CCSC)C(=O)O)NC(CCSC)C(=O)O', 'CCCCOP(=O)(OCCCC)O': 'CCCCOP(=O)(OCCCC)O', 'CCN(C(=S)S)CC': 'CCN(C(=S)S)CC', 'CCOc1ccc2c(c1)nc([nH]2)S': 'CCOc1ccc2c(c1)nc([nH]2)S', 'CCSc1nnc(s1)N': 'CCSc1nnc(s1)N', 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C', 'CNCC(C1=CC(=CC=C1)O)O': 'CNCC(C1=CC(=CC=C1)O)O', 'COC(=O)CCCC1=CNC2=CC=CC=C21': 'COC(=O)CCCC1=CNC2=CC=CC=C21', 'COC(=O)n1nnc2ccccc12': 'COC(=O)n1nnc2ccccc12', 'COCCOC(=O)OCSc1nc2c(s1)cccc2': 'COCCOC(=O)OCSc1nc2c(s1)cccc2', 'COc1ccc2c(c1)[nH]c(=S)[nH]2': 'COc1ccc2c(c1)[nH]c(=S)[nH]2', 'COc1cccc(c1)c1n[nH]c(=S)[nH]1': 'COc1cccc(c1)c1n[nH]c(=S)[nH]1', 'CS[C]1N[N]C(=N1)N': 'CS[C]1N[N]C(=N1)N', 'CSc1[nH]c2c(n1)cc(c(c2)C)C': 'CSc1[nH]c2c(n1)cc(c(c2)C)C', 'CSc1nnc(s1)N': 'CSc1nnc(s1)N', 'Cc1cc(C)nc(n1)S': 'Cc1cc(C)nc(n1)S', 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O': 'Cc1ccc(c(c1)n1nc2c(n1)cccc2)O', 'Cc1ccc2c(c1)nc([nH]2)S': 'Cc1ccc2c(c1)nc([nH]2)S', 'Cc1n[nH]c(=S)s1': 'Cc1n[nH]c(=S)s1', 'Cc1nsc(c1)N': 'Cc1nsc(c1)N', 'ClC([C]1N[N]C=N1)(Cl)Cl': 'ClC([C]1N[N]C=N1)(Cl)Cl', 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl': 'Clc1cc2[nH]c(=S)[nH]c2cc1Cl', 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O': 'Clc1ccc(cc1)CC[C@](C(C)(C)C)(Cn1cncn1)O', 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1': 'Clc1ccc(cc1Cl)c1n[nH]c(=S)[nH]1', 'Clc1ccc2c(c1)[nH]c(n2)S': 'Clc1ccc2c(c1)[nH]c(n2)S', 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1': 'Clc1cccc(c1)c1n[nH]c(=S)[nH]1', 'Cn1cnnc1S': 'Cn1cnnc1S', 'Cn1nnnc1S': 'Cn1nnnc1S', 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]': 'N.N.[N+](=O)(O)[O-].[N+](=O)(O)[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].O.O.O.O.[Ce+3]', 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O': 'NC(=S)NN=CC1=C(C(=C(C=C1)O)O)O', 'NCC(=O)O': 'NCC(=O)O', 'NO': 'NO', 'Nc1cc(N)nc(n1)S': 'Nc1cc(N)nc(n1)S', 'Nc1cc(S)nc(n1)N': 'Nc1cc(S)nc(n1)N', 'Nc1ccc2c(c1)sc(=S)[nH]2': 'Nc1ccc2c(c1)sc(=S)[nH]2', 'Nc1ccnc(n1)S': 'Nc1ccnc(n1)S', 'Nc1n[nH]c(=S)s1': 'Nc1n[nH]c(=S)s1', 'Nc1n[nH]c(n1)S': 'Nc1n[nH]c(n1)S', 'Nc1n[nH]cn1': 'Nc1n[nH]cn1', 'Nc1nc([nH]n1)C(=O)O': 'Nc1nc([nH]n1)C(=O)O', 'Nc1ncncc1N': 'Nc1ncncc1N', 'Nn1c(NN)nnc1S': 'Nn1c(NN)nnc1S', 'Nn1c(S)nnc1c1ccccc1': 'Nn1c(S)nnc1c1ccccc1', 'Nn1cnnc1': 'Nn1cnnc1', 'O/N=C(/C(=N/O)/C)\\\\C': 'O/N=C(/C(=N/O)/C)\\\\C', 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1': 'O/N=C(\\\\C(=N/O)\\\\c1ccco1)/c1ccco1', 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]': 'O=C([O-])C(O)C(O)C(O)C(O)CO.[Na+]', 'OC(=O)/C=C/c1ccccc1': 'OC(=O)/C=C/c1ccccc1', 'OC(=O)CCCCC(=O)O': 'OC(=O)CCCCC(=O)O', 'OC(=O)CCCCCCCCCCCCCCC(=O)O': 'OC(=O)CCCCCCCCCCCCCCC(=O)O', 'OC(=O)CCS': 'OC(=O)CCS', 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O': 'OC(=O)CN(CC(=O)O)CCN(CC(=O)O)CC(=O)O', 'OC(=O)CS': 'OC(=O)CS', 'OC(=O)Cn1nnnc1S': 'OC(=O)Cn1nnnc1S', 'OC(=O)c1ccc(=S)[nH]c1': 'OC(=O)c1ccc(=S)[nH]c1', 'OC(=O)c1ccc(cc1)N': 'OC(=O)c1ccc(cc1)N', 'OC(=O)c1ccc(cc1)S': 'OC(=O)c1ccc(cc1)S', 'OC(=O)c1ccc(cc1)c1ccccc1': 'OC(=O)c1ccc(cc1)c1ccccc1', 'OC(=O)c1ccccc1': 'OC(=O)c1ccccc1', 'OC(=O)c1ccccc1O': 'OC(=O)c1ccccc1O', 'OC(=O)c1ccccc1S': 'OC(=O)c1ccccc1S', 'OC(=O)c1ccccn1': 'OC(=O)c1ccccn1', 'OC(=O)c1cccnc1': 'OC(=O)c1cccnc1', 'OC(=O)c1cccnc1S': 'OC(=O)c1cccnc1S', 'OC(=O)c1ccncc1': 'OC(=O)c1ccncc1', 'OC(=O)c1n[nH]c(n1)N': 'OC(=O)c1n[nH]c(n1)N', 'OCC(CO)O': 'OCC(CO)O', 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O', 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O': 'OC[C@H]([C@H]([C@@H]([C@H](C(=O)O)O)O)O)O', 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O': 'OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O', 'O[C@H]1C(=O)OCC1(C)C': 'O[C@H]1C(=O)OCC1(C)C', 'Oc1ccc(cc1)C(=O)O': 'Oc1ccc(cc1)C(=O)O', 'Oc1ccc(cc1)S([O])([O])O': 'Oc1ccc(cc1)S([O])([O])O', 'Oc1cccc2c1nccc2': 'Oc1cccc2c1nccc2', 'Oc1ccccc1c1nnc([nH]1)S': 'Oc1ccccc1c1nnc([nH]1)S', 'On1nnc2c1cccc2': 'On1nnc2c1cccc2', 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C': 'S=c1[nH]c2c([nH]1)c(=O)n(cn2)C', 'S=c1[nH]c2c([nH]1)cncn2': 'S=c1[nH]c2c([nH]1)cncn2', 'S=c1[nH]c2c([nH]1)nccn2': 'S=c1[nH]c2c([nH]1)nccn2', 'S=c1[nH]nc([nH]1)c1cccnc1': 'S=c1[nH]nc([nH]1)c1cccnc1', 'S=c1[nH]nc([nH]1)c1ccco1': 'S=c1[nH]nc([nH]1)c1ccco1', 'S=c1[nH]nc([nH]1)c1ccncc1': 'S=c1[nH]nc([nH]1)c1ccncc1', 'S=c1sc2c([nH]1)cccc2': 'S=c1sc2c([nH]1)cccc2', 'SC#N': 'SC#N', 'S[C]1NC2=C[CH]C=NC2=N1': 'S[C]1NC2=C[CH]C=NC2=N1', 'Sc1n[nH]cn1': 'Sc1n[nH]cn1', 'Sc1nc(N)c(c(n1)S)N': 'Sc1nc(N)c(c(n1)S)N', 'Sc1nc(N)c2c(n1)[nH]nc2': 'Sc1nc(N)c2c(n1)[nH]nc2', 'Sc1nc2c([nH]1)cccc2': 'Sc1nc2c([nH]1)cccc2', 'Sc1ncc[nH]1': 'Sc1ncc[nH]1', 'Sc1ncccn1': 'Sc1ncccn1', 'Sc1nnc(s1)S': 'Sc1nnc(s1)S', '[Cl-].[Cl-].[Cl-].[Ce+3]': '[Cl-].[Cl-].[Cl-].[Ce+3]', '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]': '[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+3]', '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]': '[NH4+].[NH4+].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[N+](=O)([O-])[O-].[Ce+4]', '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]': '[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[O-]S(=O)(=O)[O-].[Ce+3].[Ce+3]', '[O-]S(=O)[O-].[Na+].[Na+]': '[O-]S(=O)[O-].[Na+].[Na+]', 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]': 'c1cc(ccc1c2[nH]c(nn2)S)[N+](=O)[O-]', 'c1ccc(nc1)c1ccccn1': 'c1ccc(nc1)c1ccccn1', 'c1ccc2c(c1)[nH]nn2': 'c1ccc2c(c1)[nH]nn2', 'c1ncn[nH]1': 'c1ncn[nH]1', 'C(C(CO)([N+](=O)[O-])Br)O': 'C(C(CO)([N+](=O)[O-])Br)O', 'C(CC=O)CC=O': 'C(CC=O)CC=O', 'C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O': 'C1=CC(=C(C=C1F)F)C(CN2C=NC=N2)(CN3C=NC=N3)O', 'C1=CC(=CN=C1)C=NNC(=S)N': 'C1=CC(=CN=C1)C=NNC(=S)N', 'C1=CC(=NC(=C1)N)N': 'C1=CC(=NC(=C1)N)N', 'C1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3Cl)N4C=CN=C4': 'C1=CC=C(C=C1)C(C2=CC=CC=C2)(C3=CC=CC=C3Cl)N4C=CN=C4', 'C1=CC=NC(=C1)C=NNC(=S)N': 'C1=CC=NC(=C1)C=NNC(=S)N', 'C1=CN=C(C=N1)C(=O)N': 'C1=CN=C(C=N1)C(=O)N', 'C1=CN=C(N=C1)N': 'C1=CN=C(N=C1)N', 'C1=CN=CC=C1C=NNC(=S)N': 'C1=CN=CC=C1C=NNC(=S)N', 'C1CCC(=NO)CC1': 'C1CCC(=NO)CC1', 'C=CC1=C(N2C(C(C2=O)NC(=O)C(=NOCC(=O)O)C3=CSC(=N3)N)SC1)C(=O)O': 'C=CC1=C(N2C(C(C2=O)NC(=O)C(=NOCC(=O)O)C3=CSC(=N3)N)SC1)C(=O)O', 'CC(=NO)C': 'CC(=NO)C', 'CC(C)(C)NCC(COC1=CC=CC2=C1CC(C(C2)O)O)O': 'CC(C)(C)NCC(COC1=CC=CC2=C1CC(C(C2)O)O)O', 'CC(C)(C)NCC(COC1=NSN=C1N2CCOCC2)O': 'CC(C)(C)NCC(COC1=NSN=C1N2CCOCC2)O', 'CC(C)NCC(COC1=CC=C(C=C1)CC(=O)N)O': 'CC(C)NCC(COC1=CC=C(C=C1)CC(=O)N)O', 'CC(C)NCC(COC1=CC=CC2=CC=CC=C21)O': 'CC(C)NCC(COC1=CC=CC2=CC=CC=C21)O', 'CC(OC(=O)C)OC(=O)C1=C(CSC2N1C(=O)C2NC(=O)C(=NOC)C3=CC=CO3)COC(=O)N': 'CC(OC(=O)C)OC(=O)C1=C(CSC2N1C(=O)C2NC(=O)C(=NOC)C3=CC=CO3)COC(=O)N', 'CC1=CC=C(C=C1)[N]2N=NC=C2O': 'CC1=CC=C(C=C1)[N]2N=NC=C2O', 'CC1=CN=C(C=N1)C(=O)N': 'CC1=CN=C(C=N1)C(=O)N', 'CC1=NC(=CC=C1)C': 'CC1=NC(=CC=C1)C', 'CCC(=NO)C': 'CCC(=NO)C', 'CCOC(=O)C1=C(C)N=C(S)NC1C2=CC=C(C=C2)Cl': 'CCOC(=O)C1=C(C)N=C(S)NC1C2=CC=C(C=C2)Cl', 'CN(C)CC1CCCCC1(C2=CC(=CC=C2)OC)O': 'CN(C)CC1CCCCC1(C2=CC(=CC=C2)OC)O', 'COC(=O)C1=CC=C(C=C1)[N]2N=NC=C2O': 'COC(=O)C1=CC=C(C=C1)[N]2N=NC=C2O', 'COC1=NC=C(N=C1)C(=O)N': 'COC1=NC=C(N=C1)C(=O)N', 'NC(N)=S': 'NC(N)=S', 'NC1=CCNC(=S)N1': 'NC1=CCNC(=S)N1', 'OC1=C(C=CC=C1)C=NC2=CC=C(C=C2)N=NC3=C(C=CC=C3)N=CC4=C(C=CC=C4)O': 'OC1=C(C=CC=C1)C=NC2=CC=C(C=C2)N=NC3=C(C=CC=C3)N=CC4=C(C=CC=C4)O', 'OC1=CN=N[N]1C2=CC=C(C=C2)Cl': 'OC1=CN=N[N]1C2=CC=C(C=C2)Cl'}\n" - ] - } - ], - "source": [ - "#Create a dictionary of unique SMILES representations found in the dataframe\n", - "\n", - "unique_SMILES = df_combined[\"SMILES\"].unique()\n", - "\n", - "dic_SMILES = dict(zip(unique_SMILES, unique_SMILES))\n", - "\n", - "print(dic_SMILES)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "from baybe.parameters import SubstanceParameter\n", - "\n", - "mordred_parameters = basic_parameters + [\n", - " SubstanceParameter(\n", - " name=\"SMILES\",\n", - " data=dic_SMILES,\n", - " encoding=\"MORDRED\",\n", - " )\n", - "]\n", - "\n", - "rdkit_parameters = basic_parameters + [\n", - " SubstanceParameter(\n", - " name=\"SMILES\",\n", - " data=dic_SMILES,\n", - " encoding=\"RDKIT\",\n", - " )\n", - "]\n", - "\n", - "morganfp_parameters = basic_parameters + [\n", - " SubstanceParameter(\n", - " name=\"SMILES\",\n", - " data=dic_SMILES,\n", - " encoding=\"MORGAN_FP\",\n", - " )\n", - "]\n", - "\n", - "ohe_parameters = basic_parameters + [\n", - " CategoricalParameter(\n", - " name=\"SMILES\",\n", - " values=unique_SMILES,\n", - " encoding=\"OHE\",\n", - " )\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "searchspace_mordred = SearchSpace.from_dataframe(df.drop(\"Efficiency\", axis = 1), mordred_parameters)\n", - "searchspace_rdkit = SearchSpace.from_dataframe(df.drop(\"Efficiency\", axis = 1), rdkit_parameters)\n", - "searchspace_morganfp = SearchSpace.from_dataframe(df.drop(\"Efficiency\", axis = 1), morganfp_parameters)\n", - "searchspace_ohe = SearchSpace.from_dataframe(df.drop(\"Efficiency\", axis = 1), ohe_parameters)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Recommenders" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The **SequentialGreedyRecommender** is a powerful recommender that leverages BoTorch optimization functions to perform sequential Greedy optimization. It can be applied for discrete, continuous and hybrid sarch spaces. It is an implementation of the BoTorch optimization functions for discrete, continuous and mixed spaces. **It is important to note that this recommender performs a brute-force search when applied in hybrid search spaces, as it optimizes the continuous part of the space while exhaustively searching choices in the discrete subspace.** You can customize this behavior to only sample a certain percentage of the discrete subspace via the sample_percentage attribute and to choose different sampling strategies via the hybrid_sampler attribute. \n", - "\n", - "e.g.\n", - "strategy = TwoPhaseStrategy(recommender=SequentialGreedyRecommender(hybrid_sampler=\"Farthest\", sampling_percentage=0.3))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For implementing fully customized surrogate models e.g. from sklearn or PyTorch, see:\n", - "https://emdgroup.github.io/baybe/examples/Custom_Surrogates/Custom_Surrogates.html\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "from baybe.recommenders import (\n", - " SequentialGreedyRecommender,\n", - " FPSRecommender,\n", - " TwoPhaseMetaRecommender,\n", - ")\n", - "\n", - "recommender = TwoPhaseMetaRecommender(\n", - " initial_recommender=FPSRecommender(), # farthest point sampling\n", - " recommender=SequentialGreedyRecommender(), # Bayesian model-based optimization\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Campaign Strategy" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "from baybe.strategies import TwoPhaseStrategy\n", - "from baybe import Campaign\n", - "\n", - "campaign_rdkit = Campaign(searchspace_rdkit, objective)\n", - "campaign_mordred = Campaign(searchspace_mordred, objective)\n", - "campaign_morganfp = Campaign(searchspace_morganfp, objective)\n", - "campaign_ohe = Campaign(searchspace_ohe, objective)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Start simulation" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "scenarios = {\n", - " \"OHE\" : campaign_ohe,\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/10 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.lineplot(\n", - " data=results,\n", - " x=\"Num_Experiments\",\n", - " y=\"Efficiency_CumBest\",\n", - " hue=\"Scenario\",\n", - " marker=\"x\",\n", - ")\n", - "\n", - "plt.xlim(0, N_DOE_ITERATIONS)\n", - "plt.ylim(0, 100)\n", - "\n", - "plt.savefig(\"Scenarios\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Evalute best results" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Index(['Scenario', 'Random_Seed', 'Iteration', 'Num_Experiments',\n", - " 'Efficiency_Measurements', 'Efficiency_IterBest', 'Efficiency_CumBest'],\n", - " dtype='object')\n" - ] - } - ], - "source": [ - "print(results.columns)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Use transfer learning on other metals" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Define the training function (i.e. the previously investigated larger data set) and the yet to be optimized data sets." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "from baybe.parameters import TaskParameter\n", - "\n", - "taskparam = TaskParameter(\n", - " name=\"Al_alloys\",\n", - " values=[\"AA1000\", \"AA2024\"],\n", - " active_values=[\"AA2024\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "unique_SMILES_transfer = df_transfer[\"SMILES\"].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [], - "source": [ - "from baybe.parameters import NumericalContinuousParameter, CategoricalParameter, NumericalDiscreteParameter\n", - "from baybe.searchspace import SearchSpace\n", - "\n", - "transfer_parameters=[\n", - "NumericalDiscreteParameter(\n", - " name=\"Time_h\",\n", - " values=df_combined[\"Time_h\"].unique(),\n", - " tolerance=5/60,\n", - "),\n", - "NumericalDiscreteParameter(\n", - " name=\"pH\",\n", - " values=df_combined[\"pH\"].unique(),\n", - " ), \n", - "NumericalDiscreteParameter(\n", - " name=\"Inhib_Concentrat_M\",\n", - " values=df_combined[\"Inhib_Concentrat_M\"].unique(),\n", - " ),\n", - "NumericalDiscreteParameter(\n", - " name=\"Salt_Concentrat_M\",\n", - " values=df_combined[\"Salt_Concentrat_M\"].unique(),\n", - " ),\n", - "CategoricalParameter(\n", - " name=\"SMILES\",\n", - " values=unique_SMILES,\n", - " encoding=\"OHE\",\n", - " )\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "searchspace_transfer = SearchSpace.from_dataframe(df_transfer.drop(\"Efficiency\", axis = 1), transfer_parameters)\n", - "\n", - "campaign_transfer = Campaign(searchspace_transfer, objective)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [], - "source": [ - "df_features = df.drop(\"Efficiency\", axis = 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Time_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
count258.000000258.000000258.000000258.000000258.000000
mean161.0910856.5798450.0081030.12740338.761628
std214.5363172.2416080.0143350.177444158.903313
min0.5000000.0000000.0000100.000000-1760.000000
25%24.0000005.4000000.0010000.05000030.000000
50%24.0000007.0000000.0010000.10000057.500000
75%240.0000007.0000000.0100000.10000087.825000
max672.00000010.0000000.1000000.600000100.000000
\n", - "" - ], - "text/plain": [ - " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", - "count 258.000000 258.000000 258.000000 258.000000 \n", - "mean 161.091085 6.579845 0.008103 0.127403 \n", - "std 214.536317 2.241608 0.014335 0.177444 \n", - "min 0.500000 0.000000 0.000010 0.000000 \n", - "25% 24.000000 5.400000 0.001000 0.050000 \n", - "50% 24.000000 7.000000 0.001000 0.100000 \n", - "75% 240.000000 7.000000 0.010000 0.100000 \n", - "max 672.000000 10.000000 0.100000 0.600000 \n", - "\n", - " Efficiency \n", - "count 258.000000 \n", - "mean 38.761628 \n", - "std 158.903313 \n", - "min -1760.000000 \n", - "25% 30.000000 \n", - "50% 57.500000 \n", - "75% 87.825000 \n", - "max 100.000000 " - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fraction_df.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "No objects to concatenate", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[65], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFraction\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m fractions[i]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Concatenate the dataframes into a single dataframe\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m concatenated_df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresults_transfer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Print the concatenated dataframe\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(concatenated_df)\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/reshape/concat.py:382\u001b[0m, in \u001b[0;36mconcat\u001b[0;34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m copy \u001b[38;5;129;01mand\u001b[39;00m using_copy_on_write():\n\u001b[1;32m 380\u001b[0m copy \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m--> 382\u001b[0m op \u001b[38;5;241m=\u001b[39m \u001b[43m_Concatenator\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 383\u001b[0m \u001b[43m \u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 384\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 385\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mnames\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnames\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43mverify_integrity\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverify_integrity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 391\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 395\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m op\u001b[38;5;241m.\u001b[39mget_result()\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/reshape/concat.py:445\u001b[0m, in \u001b[0;36m_Concatenator.__init__\u001b[0;34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverify_integrity \u001b[38;5;241m=\u001b[39m verify_integrity\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcopy \u001b[38;5;241m=\u001b[39m copy\n\u001b[0;32m--> 445\u001b[0m objs, keys \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_clean_keys_and_objs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;66;03m# figure out what our result ndim is going to be\u001b[39;00m\n\u001b[1;32m 448\u001b[0m ndims \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ndims(objs)\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/reshape/concat.py:507\u001b[0m, in \u001b[0;36m_Concatenator._clean_keys_and_objs\u001b[0;34m(self, objs, keys)\u001b[0m\n\u001b[1;32m 504\u001b[0m objs_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(objs)\n\u001b[1;32m 506\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(objs_list) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 507\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo objects to concatenate\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 509\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m keys \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 510\u001b[0m objs_list \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(com\u001b[38;5;241m.\u001b[39mnot_none(\u001b[38;5;241m*\u001b[39mobjs_list))\n", - "\u001b[0;31mValueError\u001b[0m: No objects to concatenate" - ] - } - ], - "source": [ - "# Add an additional column to each dataframe\n", - "fractions = (0.01, 0.2)\n", - "for i, df in enumerate(results_transfer):\n", - " df['Fraction'] = fractions[i]\n", - "\n", - "# Concatenate the dataframes into a single dataframe\n", - "concatenated_df = pd.concat(results_transfer, ignore_index=True)\n", - "\n", - "# Print the concatenated dataframe\n", - "print(concatenated_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Random_SeedIterationNum_ExperimentsEfficiency_IterBestEfficiency_CumBest
count50.050.0000050.0000050.00000050.000000
mean1337.024.5000025.5000068.42471886.399133
std0.014.5773814.5773823.77539911.036087
min1337.00.000001.000004.39000057.000000
25%1337.012.2500013.2500050.80600078.380000
50%1337.024.5000025.5000079.91000090.000000
75%1337.036.7500037.7500087.32821493.590000
max1337.049.0000050.0000096.20000096.200000
\n", - "
" - ], - "text/plain": [ - " Random_Seed Iteration Num_Experiments Efficiency_IterBest \\\n", - "count 50.0 50.00000 50.00000 50.000000 \n", - "mean 1337.0 24.50000 25.50000 68.424718 \n", - "std 0.0 14.57738 14.57738 23.775399 \n", - "min 1337.0 0.00000 1.00000 4.390000 \n", - "25% 1337.0 12.25000 13.25000 50.806000 \n", - "50% 1337.0 24.50000 25.50000 79.910000 \n", - "75% 1337.0 36.75000 37.75000 87.328214 \n", - "max 1337.0 49.00000 50.00000 96.200000 \n", - "\n", - " Efficiency_CumBest \n", - "count 50.000000 \n", - "mean 86.399133 \n", - "std 11.036087 \n", - "min 57.000000 \n", - "25% 78.380000 \n", - "50% 90.000000 \n", - "75% 93.590000 \n", - "max 96.200000 " - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ax = sns.lineplot(\n", - " data=result_fraction,\n", - " marker=\"o\",\n", - " markersize=10,\n", - " x=\"Num_Experiments\",\n", - " y=\"Efficiency_CumBest\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ax = sns.lineplot(\n", - " data=result_transfer_learning,\n", - " marker=\"o\",\n", - " markersize=10,\n", - " x=\"Num_Experiments\",\n", - " y=\"Efficiency_CumBest\",\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}