Skip to content

Commit ff74022

Browse files
authored
Add files via upload
1 parent 3ba019a commit ff74022

File tree

1 file changed

+220
-0
lines changed

1 file changed

+220
-0
lines changed

stat_ovr_corr.ipynb

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import numpy as np\n",
10+
"import matplotlib.pyplot as plt\n",
11+
"import pandas as pd"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": null,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"df = pd.read_csv('BBGM_League_93_all_seasons_Average_Stats.csv')"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": null,
26+
"metadata": {},
27+
"outputs": [],
28+
"source": [
29+
"df_lim = df[df.MP * df.G > 2000]"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": null,
35+
"metadata": {},
36+
"outputs": [],
37+
"source": [
38+
"sorted([(v,k) for k,v in df_lim.corr().loc['Ovr'].items()])[::-1]\n",
39+
"cols = ['EWA','PER',\"VORP\",\"BPM\",\"WS\",'WS/48','Ovr']\n",
40+
"df_lim[cols].corr().round(2)['Ovr']#.style.background_gradient()"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": null,
46+
"metadata": {},
47+
"outputs": [],
48+
"source": [
49+
"bins = pd.cut(df.MP*df.G,40)"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": null,
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"things = []\n",
59+
"for b in bins.unique():\n",
60+
" corr = df[bins == b][['Ovr','PER']].corr()['Ovr'][\"PER\"]\n",
61+
" if not np.isnan(corr):\n",
62+
" things.append([b.mid,corr,(bins==b).sum()])\n",
63+
"things = np.array(things)"
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": null,
69+
"metadata": {},
70+
"outputs": [],
71+
"source": []
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"metadata": {},
77+
"outputs": [],
78+
"source": [
79+
"def get_curve(x):\n",
80+
" scale = x[0]\n",
81+
" offset = x[1]\n",
82+
" slope = x[2]\n",
83+
" xv = things[:,0]\n",
84+
" yv = things[:,1]\n",
85+
" pred = scale * (np.tanh( (xv-offset)*slope )*0.5 + 0.5)\n",
86+
" #print(x,pred)\n",
87+
" #print()\n",
88+
" return ( ((pred - yv)*(things[:,2])) **2) .mean()"
89+
]
90+
},
91+
{
92+
"cell_type": "code",
93+
"execution_count": null,
94+
"metadata": {},
95+
"outputs": [],
96+
"source": [
97+
"import scipy.optimize as opt\n",
98+
"res = opt.minimize(get_curve,[0.8,1000,1/500])"
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": null,
104+
"metadata": {},
105+
"outputs": [],
106+
"source": [
107+
"res.x\n",
108+
"#np.tanh((things[:,0]-100)/200)"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"metadata": {},
115+
"outputs": [],
116+
"source": [
117+
"scale = res.x[0]\n",
118+
"offset =res.x[1]\n",
119+
"slope =res.x[2]\n",
120+
"plt.style.use('seaborn-white')\n",
121+
"plt.style.use('fivethirtyeight')\n",
122+
"plt.style.use('seaborn-white')\n",
123+
"label = \"{:.2f} * sigmoid( (min - {:.0f}) * {:.01e} )\".format(*res.x)\n",
124+
"plt.scatter(things[:,0],things[:,1],label='true')\n",
125+
"plt.scatter(things[:,0],scale * (np.tanh( (things[:,0]-offset)*slope )*0.5 + 0.5),c='r',label=label)\n",
126+
"plt.legend(frameon=True)\n",
127+
"plt.xlabel('Minutes')\n",
128+
"plt.ylabel('correlation')\n",
129+
"plt.title(\"PER correlation\")\n",
130+
"plt.ylim(0,1)\n",
131+
"plt.grid()\n",
132+
"plt.tight_layout()"
133+
]
134+
},
135+
{
136+
"cell_type": "code",
137+
"execution_count": null,
138+
"metadata": {},
139+
"outputs": [],
140+
"source": [
141+
"1/1300"
142+
]
143+
},
144+
{
145+
"cell_type": "code",
146+
"execution_count": null,
147+
"metadata": {},
148+
"outputs": [],
149+
"source": [
150+
"import statsmodels.api as sm"
151+
]
152+
},
153+
{
154+
"cell_type": "code",
155+
"execution_count": null,
156+
"metadata": {},
157+
"outputs": [],
158+
"source": [
159+
"clf = sm.WLS(df.Ovr,sm.add_constant(df[['PER']]),500+df.MP*df.G)\n",
160+
"res_clf = clf.fit()"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": null,
166+
"metadata": {},
167+
"outputs": [],
168+
"source": [
169+
"res_clf.summary()"
170+
]
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": null,
175+
"metadata": {},
176+
"outputs": [],
177+
"source": [
178+
"dfs = df.sample(frac=0.1)\n",
179+
"plt.scatter(dfs.PER,dfs.Ovr,s=np.sqrt(dfs.MP*dfs.G),alpha=0.1)\n",
180+
"\n",
181+
"\n",
182+
"plt.xlim(0,30)\n",
183+
"plt.ylim(20,80)\n",
184+
"plt.xlabel('PER')\n",
185+
"plt.ylabel('Ovr')\n",
186+
"plt.scatter(dfs.PER, 31.693+dfs.PER* 1.531,label='32 + per*1.5')\n",
187+
"plt.scatter( dfs.PER,36+dfs.PER*1.25,label='36 + per*1.25')\n",
188+
"plt.legend()"
189+
]
190+
},
191+
{
192+
"cell_type": "code",
193+
"execution_count": null,
194+
"metadata": {},
195+
"outputs": [],
196+
"source": []
197+
}
198+
],
199+
"metadata": {
200+
"kernelspec": {
201+
"display_name": "Python 3",
202+
"language": "python",
203+
"name": "python3"
204+
},
205+
"language_info": {
206+
"codemirror_mode": {
207+
"name": "ipython",
208+
"version": 3
209+
},
210+
"file_extension": ".py",
211+
"mimetype": "text/x-python",
212+
"name": "python",
213+
"nbconvert_exporter": "python",
214+
"pygments_lexer": "ipython3",
215+
"version": "3.7.3"
216+
}
217+
},
218+
"nbformat": 4,
219+
"nbformat_minor": 2
220+
}

0 commit comments

Comments
 (0)