Add files via upload

nicidob · web-flow · commit ff740228b49c · 2021-09-25T15:38:11.000-04:00
diff --git a/stat_ovr_corr.ipynb b/stat_ovr_corr.ipynb
@@ -0,0 +1,220 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('BBGM_League_93_all_seasons_Average_Stats.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_lim = df[df.MP * df.G > 2000]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorted([(v,k) for k,v in df_lim.corr().loc['Ovr'].items()])[::-1]\n",
+    "cols = ['EWA','PER',\"VORP\",\"BPM\",\"WS\",'WS/48','Ovr']\n",
+    "df_lim[cols].corr().round(2)['Ovr']#.style.background_gradient()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bins = pd.cut(df.MP*df.G,40)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "things = []\n",
+    "for b in bins.unique():\n",
+    "    corr = df[bins == b][['Ovr','PER']].corr()['Ovr'][\"PER\"]\n",
+    "    if not np.isnan(corr):\n",
+    "        things.append([b.mid,corr,(bins==b).sum()])\n",
+    "things = np.array(things)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_curve(x):\n",
+    "    scale = x[0]\n",
+    "    offset = x[1]\n",
+    "    slope = x[2]\n",
+    "    xv = things[:,0]\n",
+    "    yv = things[:,1]\n",
+    "    pred = scale * (np.tanh( (xv-offset)*slope )*0.5 + 0.5)\n",
+    "    #print(x,pred)\n",
+    "    #print()\n",
+    "    return ( ((pred - yv)*(things[:,2])) **2) .mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scipy.optimize as opt\n",
+    "res = opt.minimize(get_curve,[0.8,1000,1/500])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res.x\n",
+    "#np.tanh((things[:,0]-100)/200)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scale = res.x[0]\n",
+    "offset =res.x[1]\n",
+    "slope =res.x[2]\n",
+    "plt.style.use('seaborn-white')\n",
+    "plt.style.use('fivethirtyeight')\n",
+    "plt.style.use('seaborn-white')\n",
+    "label = \"{:.2f} * sigmoid( (min - {:.0f}) * {:.01e} )\".format(*res.x)\n",
+    "plt.scatter(things[:,0],things[:,1],label='true')\n",
+    "plt.scatter(things[:,0],scale * (np.tanh( (things[:,0]-offset)*slope )*0.5 + 0.5),c='r',label=label)\n",
+    "plt.legend(frameon=True)\n",
+    "plt.xlabel('Minutes')\n",
+    "plt.ylabel('correlation')\n",
+    "plt.title(\"PER correlation\")\n",
+    "plt.ylim(0,1)\n",
+    "plt.grid()\n",
+    "plt.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "1/1300"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import statsmodels.api as sm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clf = sm.WLS(df.Ovr,sm.add_constant(df[['PER']]),500+df.MP*df.G)\n",
+    "res_clf = clf.fit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res_clf.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfs = df.sample(frac=0.1)\n",
+    "plt.scatter(dfs.PER,dfs.Ovr,s=np.sqrt(dfs.MP*dfs.G),alpha=0.1)\n",
+    "\n",
+    "\n",
+    "plt.xlim(0,30)\n",
+    "plt.ylim(20,80)\n",
+    "plt.xlabel('PER')\n",
+    "plt.ylabel('Ovr')\n",
+    "plt.scatter(dfs.PER, 31.693+dfs.PER* 1.531,label='32 + per*1.5')\n",
+    "plt.scatter( dfs.PER,36+dfs.PER*1.25,label='36 + per*1.25')\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}