diff --git a/legacy_code/tim_baybe-inhibitor.ipynb b/legacy_code/tim_baybe-inhibitor.ipynb new file mode 100644 index 0000000..4f8b481 --- /dev/null +++ b/legacy_code/tim_baybe-inhibitor.ipynb @@ -0,0 +1,1057 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This project will focus on exploring the capabilities of Bayesian optimization, specifically employing BayBE, in the discovery of novel corrosion inhibitors for materials design. Initially, we will work with a randomly chosen subset from a comprehensive database of electrochemical responses of small organic molecules. Our goal is to assess how Bayesian optimization can speed up the screening process across the design space to identify promising compounds. We will compare different strategies for incorporating alloy information, while optimizing the experimental parameters with respect to the inhibitive performance of the screened compounds." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Initialization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Loading libraries and data files:" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from baybe import Campaign\n", + "\n", + "df_AA2024 = pd.read_excel('../data/averaged_filtered_AA2024.xlsx')\n", + "df_AA1000 = pd.read_excel('../data/averaged_filtered_AA1000.xlsx')\n", + "df_Al = pd.read_excel('../data/averaged_filtered_Al.xlsx')\n", + "\n", + "df_active = df_AA2024\n", + "lookup = df_active" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "def random_subsample(df, num_samples):\n", + " np.random.seed(42)\n", + " indices = np.random.choice(df.index, num_samples, replace=False)\n", + " subsampled_df = df.loc[indices]\n", + " return subsampled_df " + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SMILESTime_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
0C(=O)(C(=O)[O-])[O-]24.04.00.00100.1020.00
1C(=O)(C(=O)[O-])[O-]24.07.00.00050.0512.35
2C(=O)(C(=O)[O-])[O-]24.010.00.00100.1020.00
3C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O24.04.00.00100.1030.00
4C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O24.07.00.00050.05-23.95
\n", + "
" + ], + "text/plain": [ + " SMILES Time_h pH Inhib_Concentrat_M \\\n", + "0 C(=O)(C(=O)[O-])[O-] 24.0 4.0 0.0010 \n", + "1 C(=O)(C(=O)[O-])[O-] 24.0 7.0 0.0005 \n", + "2 C(=O)(C(=O)[O-])[O-] 24.0 10.0 0.0010 \n", + "3 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 4.0 0.0010 \n", + "4 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 7.0 0.0005 \n", + "\n", + " Salt_Concentrat_M Efficiency \n", + "0 0.10 20.00 \n", + "1 0.05 12.35 \n", + "2 0.10 20.00 \n", + "3 0.10 30.00 \n", + "4 0.05 -23.95 " + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_active.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "unique_smiles = df_active.SMILES.unique()\n", + "unique_times = df_active.Time_h.unique()\n", + "unique_pH = df_active.pH.unique()\n", + "unique_inhib_conc = df_active.Inhib_Concentrat_M.unique()\n", + "unique_salt_conc = df_active.Salt_Concentrat_M.unique()\n", + "\n", + "time_min, time_max = df_active.Time_h.min(), df_active.Time_h.max()\n", + "pH_min, pH_max = df_active.pH.min(), df_active.pH.max()\n", + "inhib_conc_min, inhib_conc_max = df_active.Inhib_Concentrat_M.min(), df_active.Inhib_Concentrat_M.max()\n", + "salt_conc_min, salt_conc_max = df_active.Salt_Concentrat_M.min(), df_active.Salt_Concentrat_M.max()\n", + "efficiency_min, efficiency_max = df_active.Efficiency.min(), df_active.Efficiency.max()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Anaylsis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bayesian Optimization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe import Campaign\n", + "\n", + "from baybe.targets import NumericalTarget\n", + "from baybe.objective import Objective\n", + "from baybe.searchspace import SearchSpace\n", + "from baybe.parameters import NumericalDiscreteParameter\n", + "from baybe.parameters import SubstanceParameter\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "def list_to_dict(input_list):\n", + " return {item: item for item in input_list}\n", + "\n", + "smiles_dict =list_to_dict(unique_smiles)\n", + "\n", + "\n", + "target = NumericalTarget(name=\"Efficiency\", mode=\"MAX\", bounds=(efficiency_min, efficiency_max), transformation=\"LINEAR\")\n", + "objective = Objective(mode=\"SINGLE\", targets=[target])\n", + "\n", + "\n", + "\n", + "parameters = [\n", + "NumericalDiscreteParameter(\n", + " name=\"Time_h\",\n", + " values=df_active['Time_h'].unique(),\n", + " # tolerance = 0.004, assume certain experimental noise for each parameter measurement?\n", + "),\n", + "NumericalDiscreteParameter(\n", + " name=\"pH\",\n", + " values=df_active['pH'].unique(),\n", + " # tolerance = 0.004\n", + " ), \n", + "NumericalDiscreteParameter( # Set this as continuous, the values seem quite small?\n", + " name=\"Inhib_Concentrat_M\",\n", + " values= df_active['Inhib_Concentrat_M'].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + "NumericalDiscreteParameter(\n", + " name=\"Salt_Concentrat_M\",\n", + " values=df_active['Salt_Concentrat_M'].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + "SubstanceParameter(\n", + " name=\"SMILES\",\n", + " data=smiles_dict,\n", + " encoding=\"MORGAN_FP\", # optional\n", + " decorrelate=0.7, # optional\n", + " ) \n", + " ]\n", + "# searchspace = SubspaceDiscrete.from_product(parameters=parameters)\n", + "df_no_target = df_active.drop('Efficiency', axis=1)\n", + "\n", + "searchspace = SearchSpace.from_dataframe(df = df_no_target, parameters=parameters)\n", + "\n", + "campaign = Campaign(\n", + " searchspace=searchspace, # Required\n", + " objective=objective, # Required\n", + " # recommender=recommender, # Optional\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.recommenders import RandomRecommender\n", + "\n", + "campaign_rand = Campaign(\n", + " searchspace=searchspace,\n", + " recommender=RandomRecommender(),\n", + " objective=objective,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "substance_encodings = [\"MORDRED\", \"RDKIT\", \"MORGAN_FP\"]\n", + "scenarios = {\n", + " encoding: Campaign(\n", + " searchspace=SearchSpace.from_dataframe(df = df_no_target,\n", + " parameters=[\n", + " NumericalDiscreteParameter(\n", + " name=\"Time_h\",\n", + " values=df_active[\"Time_h\"].unique(),\n", + " # tolerance = 0.004, assume certain experimental noise for each parameter measurement?\n", + " ),\n", + " NumericalDiscreteParameter(\n", + " name=\"pH\",\n", + " values=df_active[\"pH\"].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + " NumericalDiscreteParameter( # Set this as continuous, the values seem quite small?\n", + " name=\"Inhib_Concentrat_M\",\n", + " values=df_active[\"Inhib_Concentrat_M\"].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + " NumericalDiscreteParameter(\n", + " name=\"Salt_Concentrat_M\",\n", + " values=df_active[\"Salt_Concentrat_M\"].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + " SubstanceParameter(\n", + " name=\"SMILES\",\n", + " data=smiles_dict,\n", + " encoding=encoding, # optional\n", + " decorrelate=0.7, # optional\n", + " ),\n", + " ]\n", + " ),\n", + " objective=objective,\n", + " )\n", + " for encoding in substance_encodings\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "scenarios.update({\"Random Baseline\": campaign_rand})" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|##########| 4/4 [01:05<00:00, 16.27s/it]\n" + ] + } + ], + "source": [ + "from baybe.simulation import simulate_experiment\n", + "from baybe.simulation import simulate_scenarios\n", + "\n", + "\n", + "BATCH_SIZE = 1\n", + "N_DOE_ITERATIONS = 50\n", + "N_MC_ITERATIONS = 10\n", + "\n", + "# results = simulate_experiment(\n", + "# campaign,\n", + "# df_AA2024,\n", + "# initial_data=initial_dataset,\n", + "# batch_size=BATCH_SIZE,\n", + "# n_doe_iterations=N_DOE_ITERATIONS,\n", + "# impute_mode=\"best\",\n", + "# )\n", + "initial_dataset = random_subsample(df_active, 5)\n", + "\n", + "results = simulate_scenarios(\n", + " scenarios,\n", + " lookup,\n", + " initial_data=[initial_dataset],\n", + " batch_size=BATCH_SIZE,\n", + " n_doe_iterations=N_DOE_ITERATIONS,\n", + " # n_mc_iterations=N_MC_ITERATIONS,\n", + " impute_mode=\"best\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "max_efficiency = lookup[\"Efficiency\"].max()\n", + "# plot_results = results[results['Scenario'].isin(['Mordred', 'Morgan', 'RDKIT'])]\n", + "\n", + "sns.lineplot(\n", + " data=results, x=\"Num_Experiments\", y=\"Efficiency_CumBest\", hue=\"Scenario\", marker=\"x\"\n", + ")\n", + "plt.plot([0.5, N_DOE_ITERATIONS+0.5], [max_efficiency, max_efficiency], \"--r\")\n", + "plt.legend(loc=\"lower right\")\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.xlim(0, N_DOE_ITERATIONS+1)\n", + "plt.savefig(\"./AA1000_simulation_10MC_50exp_1batch.png\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Search Space" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Target & Objective" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Recommender" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Benchmarking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transfer Learning" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from botorch.test_functions.synthetic import Hartmann\n", + "\n", + "from baybe import Campaign\n", + "from baybe.objective import Objective\n", + "from baybe.parameters import NumericalDiscreteParameter, TaskParameter, CategoricalParameter\n", + "from baybe.searchspace import SearchSpace\n", + "from baybe.simulation import simulate_scenarios, simulate_transfer_learning\n", + "from baybe.targets import NumericalTarget\n", + "from baybe.utils.botorch_wrapper import botorch_function_wrapper\n", + "from baybe.utils.plotting import create_example_plots\n" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "SMOKE_TEST = \"SMOKE_TEST\" in os.environ # reduce the problem complexity in CI pipelines\n", + "DIMENSION = 3 # input dimensionality of the test function\n", + "BATCH_SIZE = 1 # batch size of recommendations per DOE iteration\n", + "N_MC_ITERATIONS = 2 if SMOKE_TEST else 50 # number of Monte Carlo runs\n", + "N_DOE_ITERATIONS = 2 if SMOKE_TEST else 10 # number of DOE iterations\n", + "POINTS_PER_DIM = 3 if SMOKE_TEST else 7 # number of grid points per input dimension\n" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "df_AA2024 = pd.read_excel('../data/averaged_filtered_AA2024.xlsx')\n", + "df_AA2024[\"alloy\"] = \"AA2024\"\n", + "df_AA1000 = pd.read_excel('../data/averaged_filtered_AA1000.xlsx')\n", + "df_AA1000[\"alloy\"] = \"AA1000\"\n", + "df_AA5000 = pd.read_excel('../data/averaged_filtered_AA5000.xlsx')\n", + "df_AA5000[\"alloy\"] = \"AA5000\"\n", + "df_AA6000 = pd.read_excel('../data/averaged_filtered_AA6000.xlsx')\n", + "df_AA6000[\"alloy\"] = \"AA6000\"\n", + "df_AA7075 = pd.read_excel('../data/averaged_filtered_AA7075.xlsx')\n", + "df_AA7075[\"alloy\"] = \"AA7075\"\n", + "df_Al = pd.read_excel('../data/averaged_filtered_Al.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "target = NumericalTarget(name=\"Efficiency\", mode=\"MAX\", bounds=(efficiency_min, efficiency_max), transformation=\"LINEAR\")\n", + "objective = Objective(mode=\"SINGLE\", targets=[target])" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [], + "source": [ + "df_combined = pd.concat([df_AA2024, df_AA1000, df_AA5000, df_AA6000, df_AA7075], axis=0).reset_index(drop=True)\n", + "# df_active = df_AA2024\n", + "lookup = df_combined" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [], + "source": [ + "def list_to_dict(input_list):\n", + " return {item: item for item in input_list}\n", + "\n", + "smiles_dict =list_to_dict(df_combined.SMILES.unique())\n", + "\n", + "discrete_params = [\n", + "NumericalDiscreteParameter(\n", + " name=\"Time_h\",\n", + " values=df_combined['Time_h'].unique(),\n", + " # tolerance = 0.004, assume certain experimental noise for each parameter measurement?\n", + "),\n", + "NumericalDiscreteParameter(\n", + " name=\"pH\",\n", + " values=df_combined['pH'].unique(),\n", + " # tolerance = 0.004\n", + " ), \n", + "NumericalDiscreteParameter( # Set this as continuous, the values seem quite small?\n", + " name=\"Inhib_Concentrat_M\",\n", + " values= df_combined['Inhib_Concentrat_M'].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + "NumericalDiscreteParameter(\n", + " name=\"Salt_Concentrat_M\",\n", + " values=df_combined['Salt_Concentrat_M'].unique(),\n", + " # tolerance = 0.004\n", + " ),\n", + "# CategoricalParameter(\n", + "# name=\"alloy\",\n", + "# values=df_combined['alloy'].unique(),\n", + "# ),\n", + "SubstanceParameter(\n", + " name=\"SMILES\",\n", + " data=smiles_dict,\n", + " encoding=\"MORDRED\", # optional\n", + " decorrelate=0.7, # optional\n", + " ) \n", + " ]\n", + "# searchspace = SubspaceDiscrete.from_product(parameters=parameters)\n", + "df_no_target = df_combined.drop('Efficiency', axis=1).reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [], + "source": [ + "task_param = TaskParameter(\n", + " name=\"alloy\",\n", + " values=[\"AA1000\", \"AA2024\", \"AA5000\", \"AA6000\", \"AA7075\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [], + "source": [ + "parameters = [*discrete_params, task_param]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SMILESTime_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiencyalloy
0C(=O)(C(=O)[O-])[O-]24.04.00.00100.1020.00AA2024
1C(=O)(C(=O)[O-])[O-]24.07.00.00050.0512.35AA2024
2C(=O)(C(=O)[O-])[O-]24.010.00.00100.1020.00AA2024
3C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O24.04.00.00100.1030.00AA2024
4C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O24.07.00.00050.05-23.95AA2024
\n", + "
" + ], + "text/plain": [ + " SMILES Time_h pH Inhib_Concentrat_M \\\n", + "0 C(=O)(C(=O)[O-])[O-] 24.0 4.0 0.0010 \n", + "1 C(=O)(C(=O)[O-])[O-] 24.0 7.0 0.0005 \n", + "2 C(=O)(C(=O)[O-])[O-] 24.0 10.0 0.0010 \n", + "3 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 4.0 0.0010 \n", + "4 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 7.0 0.0005 \n", + "\n", + " Salt_Concentrat_M Efficiency alloy \n", + "0 0.10 20.00 AA2024 \n", + "1 0.05 12.35 AA2024 \n", + "2 0.10 20.00 AA2024 \n", + "3 0.10 30.00 AA2024 \n", + "4 0.05 -23.95 AA2024 " + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lookup.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SMILESTime_hpHInhib_Concentrat_MSalt_Concentrat_Malloy
0C(=O)(C(=O)[O-])[O-]24.04.00.00100.10AA2024
1C(=O)(C(=O)[O-])[O-]24.07.00.00050.05AA2024
2C(=O)(C(=O)[O-])[O-]24.010.00.00100.10AA2024
3C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O24.04.00.00100.10AA2024
4C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O24.07.00.00050.05AA2024
.....................
1180c1ccc2c(c1)[nH]nn224.04.00.00100.10AA7075
1181c1ccc2c(c1)[nH]nn224.010.00.00100.10AA7075
1182c1ccc2c(c1)[nH]nn2672.07.00.00100.10AA7075
1183c1ncn[nH]124.04.00.00100.10AA7075
1184c1ncn[nH]124.010.00.00100.10AA7075
\n", + "

1185 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " SMILES Time_h pH Inhib_Concentrat_M \\\n", + "0 C(=O)(C(=O)[O-])[O-] 24.0 4.0 0.0010 \n", + "1 C(=O)(C(=O)[O-])[O-] 24.0 7.0 0.0005 \n", + "2 C(=O)(C(=O)[O-])[O-] 24.0 10.0 0.0010 \n", + "3 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 4.0 0.0010 \n", + "4 C(C(=O)[O-])C(CC(=O)[O-])(C(=O)[O-])O 24.0 7.0 0.0005 \n", + "... ... ... ... ... \n", + "1180 c1ccc2c(c1)[nH]nn2 24.0 4.0 0.0010 \n", + "1181 c1ccc2c(c1)[nH]nn2 24.0 10.0 0.0010 \n", + "1182 c1ccc2c(c1)[nH]nn2 672.0 7.0 0.0010 \n", + "1183 c1ncn[nH]1 24.0 4.0 0.0010 \n", + "1184 c1ncn[nH]1 24.0 10.0 0.0010 \n", + "\n", + " Salt_Concentrat_M alloy \n", + "0 0.10 AA2024 \n", + "1 0.05 AA2024 \n", + "2 0.10 AA2024 \n", + "3 0.10 AA2024 \n", + "4 0.05 AA2024 \n", + "... ... ... \n", + "1180 0.10 AA7075 \n", + "1181 0.10 AA7075 \n", + "1182 0.10 AA7075 \n", + "1183 0.10 AA7075 \n", + "1184 0.10 AA7075 \n", + "\n", + "[1185 rows x 6 columns]" + ] + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_combined.drop('Efficiency', axis=1).reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [], + "source": [ + "searchspace = SearchSpace.from_dataframe(df = df_combined.drop('Efficiency', axis=1), parameters=parameters)" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [], + "source": [ + "campaign = Campaign(searchspace=searchspace, objective=objective)" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/250 [00:00