Skip to content

Commit a91de03

Browse files
authored
Add files via upload
1 parent 44cd2a2 commit a91de03

File tree

1 file changed

+83
-17
lines changed

1 file changed

+83
-17
lines changed

estimateSOVR.ipynb

Lines changed: 83 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
"metadata": {},
2020
"outputs": [],
2121
"source": [
22-
"df = pd.read_csv('BBGM_League_1_all_seasons_Average_Stats.csv')"
22+
"df = pd.read_csv('BBGM_League_109_all_seasons_Average_Stats.csv')\n",
23+
"df = df[df['MP']*df['G'] > 10]"
2324
]
2425
},
2526
{
@@ -46,8 +47,7 @@
4647
"metadata": {},
4748
"outputs": [],
4849
"source": [
49-
"stats_to_est = ['FT','FTA','FG','FGA','ORB','DRB','AST','TOV','STL','Blk','PF','PTS','3P','3PA','BA']\n",
50-
"sT = [_ +'p36' for _ in stats_to_est]"
50+
"plt.hist(df['+/-'])"
5151
]
5252
},
5353
{
@@ -56,8 +56,27 @@
5656
"metadata": {},
5757
"outputs": [],
5858
"source": [
59+
"# full\n",
60+
"stats_to_est = [ 'FG', 'FGA', '3P', '3PA', '2P', '2PA', 'FT', 'FTA', 'ORB', 'DRB', 'TRB', 'AST', 'TOV', 'STL', 'BLK', 'BA', 'PF', 'PTS']\n",
61+
"# pruned by hand based on t-values, R^2 = .804\n",
62+
"stats_to_est = [ '3PA', '2PA', 'FTA', 'TRB', 'AST', 'TOV', 'STL', 'BLK', 'PF', 'PTS']\n",
63+
"#R^2 = .773 w/o +/-, 0.788 with\n",
64+
"stats_to_est = [ 'TRB', 'AST', 'TOV', 'STL', 'BLK', 'PF', 'PTS', '+/-']\n",
65+
"\n",
66+
"sT = [_ +'p36' for _ in stats_to_est]\n"
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": null,
72+
"metadata": {},
73+
"outputs": [],
74+
"source": [
75+
"p_36_stats = []\n",
5976
"for s in stats_to_est:\n",
60-
" df[s + 'p36'] = 36*df[s]/np.maximum(df['MP'],1e-6)"
77+
" df[s + 'p36'] = 36*df[s]/np.maximum(df['MP'],1)\n",
78+
" p_36_stats.append(s+'p36')\n",
79+
"p_36_stats = p_36_stats #+['+/-']#+ ['TS%']"
6180
]
6281
},
6382
{
@@ -66,9 +85,24 @@
6685
"metadata": {},
6786
"outputs": [],
6887
"source": [
69-
"dfs = df[df.MP*df.G > 850]\n",
70-
"res = sm.RLM(dfs['Ovr'],sm.add_constant(dfs[stats_to_est])).fit()\n",
71-
"res.summary()"
88+
"res = sm.WLS(df['Ovr'],sm.add_constant(df[p_36_stats]),df['MP']*df['G']).fit()\n",
89+
"res.summary() # -6.7306e+05 1.346e+06 1.005e+05"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": null,
95+
"metadata": {
96+
"scrolled": true
97+
},
98+
"outputs": [],
99+
"source": [
100+
"import sklearn.linear_model as linear_model\n",
101+
"clf = linear_model.RidgeCV(np.logspace(-3,3,101))\n",
102+
"p = 1\n",
103+
"clf.fit(df[p_36_stats],(df['Ovr'])**p,df['MP']*df['G'])\n",
104+
"df['sOVR'] = np.maximum(0,clf.predict(df[p_36_stats]))**(1/p)\n",
105+
"plt.scatter(df['Ovr'],df['sOVR'],s=5,alpha=0.5)\n"
72106
]
73107
},
74108
{
@@ -77,7 +111,7 @@
77111
"metadata": {},
78112
"outputs": [],
79113
"source": [
80-
"plt.scatter(dfs['Ovr'],res.predict(),s=5,alpha=0.5)"
114+
"plt.scatter(df['Ovr'],res.predict(),s=5,alpha=0.5)"
81115
]
82116
},
83117
{
@@ -86,8 +120,8 @@
86120
"metadata": {},
87121
"outputs": [],
88122
"source": [
89-
"dfs['sOVR'] = res.predict()\n",
90-
"dfs['PS'] = dfs['Ovr']/dfs['sOVR']"
123+
"df['sOVR'] = clf.predict(df[p_36_stats])**(1/p)#res.predict()\n",
124+
"df['PS'] = df['Ovr']/df['sOVR']"
91125
]
92126
},
93127
{
@@ -105,7 +139,7 @@
105139
"metadata": {},
106140
"outputs": [],
107141
"source": [
108-
"plt.plot(dfs[['Age','PS','Ovr','sOVR']].groupby('Age').mean()['PS'])\n",
142+
"plt.plot(df[['Age','PS','Ovr','sOVR']].groupby('Age').mean()['PS'])\n",
109143
"plt.xlim(20,44)\n",
110144
"plt.ylabel('Ovr ÷ sOVR')\n",
111145
"plt.xlabel('Age')"
@@ -138,8 +172,6 @@
138172
"metadata": {},
139173
"outputs": [],
140174
"source": [
141-
"#dft = dfs.groupby(['pid','Age']).mean()\n",
142-
"df['sOVR'] = res.predict(sm.add_constant(df[stats_to_est]))\n",
143175
"dft = df.groupby(['pid','Age']).mean()"
144176
]
145177
},
@@ -176,19 +208,53 @@
176208
"metadata": {},
177209
"outputs": [],
178210
"source": [
179-
"print('4 year value by age\\nage [pot ovr]')\n",
211+
"print('5 year value by age\\nage scale [pot ovr]')\n",
212+
"sA = []\n",
213+
"sB = []\n",
180214
"for age in sorted(p_y.keys()):\n",
181215
" Xy = np.array(p_y[age])\n",
182216
" if (Xy[:,0] == Xy[:,1]).all():\n",
183217
" Xy = Xy[:,1:]\n",
184218
" res2 = sm.OLS(Xy[:,-1],Xy[:,:-1]).fit()\n",
185-
" print(age,np.round(res2.params,2))"
219+
" v = res2.params\n",
220+
" vS = sum(v)\n",
221+
" sA.append((age,vS))\n",
222+
" sB.append((age,v[0]/vS))\n",
223+
" print(age,np.round(vS,2),np.round(v/vS,2))"
224+
]
225+
},
226+
{
227+
"cell_type": "code",
228+
"execution_count": null,
229+
"metadata": {},
230+
"outputs": [],
231+
"source": [
232+
"sA = np.array(sA)\n",
233+
"plt.plot(sA[:,0],sA[:,1])\n",
234+
"plt.xlim(20,40)\n",
235+
"plt.xlabel('age')\n",
236+
"plt.ylabel('multi-year value')\n",
237+
"plt.title('4 year [reg tree]')"
238+
]
239+
},
240+
{
241+
"cell_type": "code",
242+
"execution_count": null,
243+
"metadata": {},
244+
"outputs": [],
245+
"source": [
246+
"sB = np.array(sB)\n",
247+
"plt.plot(sB[:,0],sB[:,1])\n",
248+
"plt.xlim(20,28)\n",
249+
"plt.xlabel('age')\n",
250+
"plt.ylabel('potential weight')\n",
251+
"plt.title('4 year [reg tree]')"
186252
]
187253
}
188254
],
189255
"metadata": {
190256
"kernelspec": {
191-
"display_name": "Python 3",
257+
"display_name": "Python 3 (ipykernel)",
192258
"language": "python",
193259
"name": "python3"
194260
},
@@ -202,7 +268,7 @@
202268
"name": "python",
203269
"nbconvert_exporter": "python",
204270
"pygments_lexer": "ipython3",
205-
"version": "3.7.3"
271+
"version": "3.9.12"
206272
}
207273
},
208274
"nbformat": 4,

0 commit comments

Comments
 (0)