diff --git a/baybe-inhibitor Alex6022.ipynb b/baybe-inhibitor Alex6022.ipynb new file mode 100644 index 0000000..e9c7823 --- /dev/null +++ b/baybe-inhibitor Alex6022.ipynb @@ -0,0 +1,691 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This project will focus on exploring the capabilities of Bayesian optimization, specifically employing BayBE, in the discovery of novel corrosion inhibitors for materials design. Initially, we will work with a randomly chosen subset from a comprehensive database of electrochemical responses of small organic molecules. Our goal is to assess how Bayesian optimization can speed up the screening process across the design space to identify promising compounds. We will compare different strategies for incorporating alloy information, while optimizing the experimental parameters with respect to the inhibitive performance of the screened compounds." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Initizalization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Loading libraries and data files:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/vscode/.local/lib/python3.10/site-packages/baybe/telemetry.py:222: UserWarning: WARNING: BayBE Telemetry endpoint https://public.telemetry.baybe.p.uptimize.merckgroup.com:4317 cannot be reached. Disabling telemetry. The exception encountered was: ConnectionError, HTTPConnectionPool(host='verkehrsnachrichten.merck.de', port=80): Max retries exceeded with url: / (Caused by NameResolutionError(\": Failed to resolve 'verkehrsnachrichten.merck.de' ([Errno -2] Name or service not known)\"))\n", + " warnings.warn(\n", + "/home/vscode/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from baybe import Campaign\n", + "\n", + "df_AA2024 = pd.read_excel('data/filtered_AA2024.xlsx')\n", + "df_AA1000 = pd.read_excel('data/filtered_AA1000.xlsx')\n", + "df_Al = pd.read_excel('data/filtered_Al.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Time_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
count1966.0000001966.0000001.966000e+031966.0000001966.000000
mean71.3323093.6265852.918525e-020.07765548.383540
std154.5943254.0895422.442926e-010.217774163.164194
min0.000000-0.6000001.000000e-070.000000-4834.000000
25%1.6700000.0000001.000000e-040.00000035.602500
50%24.0000001.0000001.000000e-030.00000061.490000
75%24.0000007.0000002.000000e-030.10000082.000000
max720.00000014.3010303.280000e+002.000000100.000000
\n", + "
" + ], + "text/plain": [ + " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "count 1966.000000 1966.000000 1.966000e+03 1966.000000 \n", + "mean 71.332309 3.626585 2.918525e-02 0.077655 \n", + "std 154.594325 4.089542 2.442926e-01 0.217774 \n", + "min 0.000000 -0.600000 1.000000e-07 0.000000 \n", + "25% 1.670000 0.000000 1.000000e-04 0.000000 \n", + "50% 24.000000 1.000000 1.000000e-03 0.000000 \n", + "75% 24.000000 7.000000 2.000000e-03 0.100000 \n", + "max 720.000000 14.301030 3.280000e+00 2.000000 \n", + "\n", + " Efficiency \n", + "count 1966.000000 \n", + "mean 48.383540 \n", + "std 163.164194 \n", + "min -4834.000000 \n", + "25% 35.602500 \n", + "50% 61.490000 \n", + "75% 82.000000 \n", + "max 100.000000 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_AA1000.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Time_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
count611.000000611.000000611.000000611.000000611.000000
mean135.8019646.3420620.0068080.14545026.736841
std201.6838672.5290800.0140590.200575288.788317
min0.5000000.0000000.0000100.000000-4834.000000
25%24.0000004.0000000.0005000.01000030.000000
50%24.0000007.0000000.0010000.10000058.000000
75%144.0000007.0000000.0030000.10000087.950000
max672.00000010.0000000.1000000.600000100.000000
\n", + "
" + ], + "text/plain": [ + " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "count 611.000000 611.000000 611.000000 611.000000 \n", + "mean 135.801964 6.342062 0.006808 0.145450 \n", + "std 201.683867 2.529080 0.014059 0.200575 \n", + "min 0.500000 0.000000 0.000010 0.000000 \n", + "25% 24.000000 4.000000 0.000500 0.010000 \n", + "50% 24.000000 7.000000 0.001000 0.100000 \n", + "75% 144.000000 7.000000 0.003000 0.100000 \n", + "max 672.000000 10.000000 0.100000 0.600000 \n", + "\n", + " Efficiency \n", + "count 611.000000 \n", + "mean 26.736841 \n", + "std 288.788317 \n", + "min -4834.000000 \n", + "25% 30.000000 \n", + "50% 58.000000 \n", + "75% 87.950000 \n", + "max 100.000000 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_AA2024.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Time_hpHInhib_Concentrat_MSalt_Concentrat_MEfficiency
count1966.0000001966.0000001.966000e+031966.0000001966.000000
mean71.3323093.6265852.918525e-020.07765548.383540
std154.5943254.0895422.442926e-010.217774163.164194
min0.000000-0.6000001.000000e-070.000000-4834.000000
25%1.6700000.0000001.000000e-040.00000035.602500
50%24.0000001.0000001.000000e-030.00000061.490000
75%24.0000007.0000002.000000e-030.10000082.000000
max720.00000014.3010303.280000e+002.000000100.000000
\n", + "
" + ], + "text/plain": [ + " Time_h pH Inhib_Concentrat_M Salt_Concentrat_M \\\n", + "count 1966.000000 1966.000000 1.966000e+03 1966.000000 \n", + "mean 71.332309 3.626585 2.918525e-02 0.077655 \n", + "std 154.594325 4.089542 2.442926e-01 0.217774 \n", + "min 0.000000 -0.600000 1.000000e-07 0.000000 \n", + "25% 1.670000 0.000000 1.000000e-04 0.000000 \n", + "50% 24.000000 1.000000 1.000000e-03 0.000000 \n", + "75% 24.000000 7.000000 2.000000e-03 0.100000 \n", + "max 720.000000 14.301030 3.280000e+00 2.000000 \n", + "\n", + " Efficiency \n", + "count 1966.000000 \n", + "mean 48.383540 \n", + "std 163.164194 \n", + "min -4834.000000 \n", + "25% 35.602500 \n", + "50% 61.490000 \n", + "75% 82.000000 \n", + "max 100.000000 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_Al.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Processing" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "df = df_AA2024" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Anaylsis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bayesian Optimization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Search Space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.parameters import NumericalDiscreteParameter, NumericalContinuousParameter\n", + "from baybe.searchspace import SearchSpace\n", + "\n", + "parameters = [\n", + "NumericalContinuousParameter(\n", + " name=\"Time (h)\",\n", + " bounds=(df['Time_h'].min(), df['Time_h'].max()),\n", + "),\n", + "NumericalContinuousParameter(\n", + " name=\"pH\",\n", + " bounds=(1, 14),\n", + " ), \n", + "NumericalContinuousParameter(\n", + " name=\"Inhibitor Concentration (M)\",\n", + " bounds=(df['Inhib_Concentrat_M'].min(), df['Inhib_Concentrat_M'].max()),\n", + " ),\n", + "NumericalContinuousParameter(\n", + " name=\"Salt_Concentrat_M\",\n", + " bounds=(df['Salt_Concentrat_M'].min(), df['Salt_Concentrat_M'].max()),\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Objective" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.targets import NumericalTarget\n", + "from baybe.objective import Objective\n", + "\n", + "target = NumericalTarget(\n", + " name=\"Efficiency\",\n", + " mode=\"MAX\",\n", + ")\n", + "objective = Objective(mode=\"SINGLE\", targets=[target])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "from baybe.searchspace import SearchSpace\n", + "\n", + "searchspace = SearchSpace.from_product(parameters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Recommender" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Time (h) pH Inhibitor Concentration (M) Salt_Concentrat_M\n", + "0 139.744749 9.907613 0.003735 0.418879\n", + "1 516.446510 8.229525 0.009470 0.463482\n", + "2 99.505625 7.063403 0.030336 0.543470\n" + ] + } + ], + "source": [ + "from baybe import Campaign\n", + "\n", + "campaign = Campaign(searchspace, objective)\n", + "\n", + "df_recommend = campaign.recommend(batch_size=3)\n", + "print(df_recommend)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "operands could not be broadcast together with shapes (6,) (3,4) ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[22], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Calculate Euclidean distance between each row in df and the entry in df_recommend\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m distances \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrow\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinalg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnorm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrow\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdf_recommend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Get the index of the row with the minimum distance\u001b[39;00m\n\u001b[1;32m 7\u001b[0m closest_index \u001b[38;5;241m=\u001b[39m distances\u001b[38;5;241m.\u001b[39midxmin()\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/frame.py:10361\u001b[0m, in \u001b[0;36mDataFrame.apply\u001b[0;34m(self, func, axis, raw, result_type, args, by_row, engine, engine_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 10347\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapply\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m frame_apply\n\u001b[1;32m 10349\u001b[0m op \u001b[38;5;241m=\u001b[39m frame_apply(\n\u001b[1;32m 10350\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 10351\u001b[0m func\u001b[38;5;241m=\u001b[39mfunc,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 10359\u001b[0m kwargs\u001b[38;5;241m=\u001b[39mkwargs,\n\u001b[1;32m 10360\u001b[0m )\n\u001b[0;32m> 10361\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapply\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/apply.py:916\u001b[0m, in \u001b[0;36mFrameApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 913\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw:\n\u001b[1;32m 914\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_raw(engine\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine, engine_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine_kwargs)\n\u001b[0;32m--> 916\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/apply.py:1063\u001b[0m, in \u001b[0;36mFrameApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1061\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply_standard\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 1062\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpython\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 1063\u001b[0m results, res_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_series_generator\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1064\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1065\u001b[0m results, res_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_series_numba()\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/apply.py:1081\u001b[0m, in \u001b[0;36mFrameApply.apply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1078\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m option_context(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmode.chained_assignment\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 1079\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(series_gen):\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;66;03m# ignore SettingWithCopy here in case the user mutates\u001b[39;00m\n\u001b[0;32m-> 1081\u001b[0m results[i] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1082\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(results[i], ABCSeries):\n\u001b[1;32m 1083\u001b[0m \u001b[38;5;66;03m# If we have a view on v, we need to make a copy because\u001b[39;00m\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;66;03m# series_generator will swap out the underlying data\u001b[39;00m\n\u001b[1;32m 1085\u001b[0m results[i] \u001b[38;5;241m=\u001b[39m results[i]\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "Cell \u001b[0;32mIn[22], line 4\u001b[0m, in \u001b[0;36m\u001b[0;34m(row)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Calculate Euclidean distance between each row in df and the entry in df_recommend\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m distances \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m row: np\u001b[38;5;241m.\u001b[39mlinalg\u001b[38;5;241m.\u001b[39mnorm(\u001b[43mrow\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdf_recommend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m), axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Get the index of the row with the minimum distance\u001b[39;00m\n\u001b[1;32m 7\u001b[0m closest_index \u001b[38;5;241m=\u001b[39m distances\u001b[38;5;241m.\u001b[39midxmin()\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/ops/common.py:76\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer..new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[1;32m 74\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> 76\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/arraylike.py:194\u001b[0m, in \u001b[0;36mOpsMixin.__sub__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__sub__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 193\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__sub__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[0;32m--> 194\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msub\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/series.py:6126\u001b[0m, in \u001b[0;36mSeries._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 6124\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_arith_method\u001b[39m(\u001b[38;5;28mself\u001b[39m, other, op):\n\u001b[1;32m 6125\u001b[0m \u001b[38;5;28mself\u001b[39m, other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_align_for_op(other)\n\u001b[0;32m-> 6126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbase\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mIndexOpsMixin\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_arith_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/base.py:1382\u001b[0m, in \u001b[0;36mIndexOpsMixin._arith_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m 1379\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marange(rvalues\u001b[38;5;241m.\u001b[39mstart, rvalues\u001b[38;5;241m.\u001b[39mstop, rvalues\u001b[38;5;241m.\u001b[39mstep)\n\u001b[1;32m 1381\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1384\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(result, name\u001b[38;5;241m=\u001b[39mres_name)\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/ops/array_ops.py:283\u001b[0m, in \u001b[0;36marithmetic_op\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m 279\u001b[0m _bool_arith_check(op, left, right) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 281\u001b[0m \u001b[38;5;66;03m# error: Argument 1 to \"_na_arithmetic_op\" has incompatible type\u001b[39;00m\n\u001b[1;32m 282\u001b[0m \u001b[38;5;66;03m# \"Union[ExtensionArray, ndarray[Any, Any]]\"; expected \"ndarray[Any, Any]\"\u001b[39;00m\n\u001b[0;32m--> 283\u001b[0m res_values \u001b[38;5;241m=\u001b[39m \u001b[43m_na_arithmetic_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/ops/array_ops.py:218\u001b[0m, in \u001b[0;36m_na_arithmetic_op\u001b[0;34m(left, right, op, is_cmp)\u001b[0m\n\u001b[1;32m 215\u001b[0m func \u001b[38;5;241m=\u001b[39m partial(expressions\u001b[38;5;241m.\u001b[39mevaluate, op)\n\u001b[1;32m 217\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 218\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mleft\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mright\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_cmp \u001b[38;5;129;01mand\u001b[39;00m (\n\u001b[1;32m 221\u001b[0m left\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(right, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 222\u001b[0m ):\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# Don't do this for comparisons, as that will handle complex numbers\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# incorrectly, see GH#32047\u001b[39;00m\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/computation/expressions.py:242\u001b[0m, in \u001b[0;36mevaluate\u001b[0;34m(op, a, b, use_numexpr)\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m op_str \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_numexpr:\n\u001b[1;32m 241\u001b[0m \u001b[38;5;66;03m# error: \"None\" not callable\u001b[39;00m\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_str\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _evaluate_standard(op, op_str, a, b)\n", + "File \u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/computation/expressions.py:73\u001b[0m, in \u001b[0;36m_evaluate_standard\u001b[0;34m(op, op_str, a, b)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _TEST_MODE:\n\u001b[1;32m 72\u001b[0m _store_test_result(\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (6,) (3,4) " + ] + } + ], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Benchmarking" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Transfer Learning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}