|
20 | 20 | "metadata": {},
|
21 | 21 | "outputs": [],
|
22 | 22 | "source": [
|
23 |
| - "# In Python, module can be imported by a command similar to 'import numpy as np'. \n", |
24 |
| - "# It is a good practice to maintain a section at the beginning of the notebook to import all necessary modules.\n", |
| 23 | + "# in Python, module can be imported by a command similar to 'import numpy as np'. \n", |
| 24 | + "# it is a good practice to maintain a section at the beginning of the notebook to import all necessary modules.\n", |
25 | 25 | "# for new module, could use pip to install it. \n",
|
26 | 26 | "# for example 'pip install numpy'\n",
|
27 | 27 | "import numpy as np\n",
|
|
110 | 110 | "metadata": {},
|
111 | 111 | "outputs": [],
|
112 | 112 | "source": [
|
113 |
| - "# Use dir() to get a list of all the attributes an object has\n", |
| 113 | + "# use dir() to get a list of all the attributes an object has\n", |
114 | 114 | "dir(lm)"
|
115 | 115 | ]
|
116 | 116 | },
|
|
190 | 190 | "metadata": {},
|
191 | 191 | "outputs": [],
|
192 | 192 | "source": [
|
193 |
| - "# As mentioned above. For machine learning models, sklearn is the most common used module, but sklearn is a little bit less on statistics.\n", |
| 193 | + "# as mentioned above. For machine learning models, sklearn is the most common used module, but sklearn is a little bit less on statistics.\n", |
194 | 194 | "x = pd.DataFrame(Boston.lstat)\n",
|
195 | 195 | "y = Boston.medv\n",
|
196 | 196 | "print(x.shape)\n",
|
|
235 | 235 | "metadata": {},
|
236 | 236 | "outputs": [],
|
237 | 237 | "source": [
|
238 |
| - "# If we want to use all the variable. We can use the following trick to manually construct the list. In Python, most of time, you have to manully construct the variable list.\n", |
| 238 | + "# if we want to use all the variable. We can use the following trick to manually construct the list. In Python, most of time, you have to manully construct the variable list.\n", |
239 | 239 | "all_columns = \"+\".join(Boston.columns.difference([\"medv\"]))\n",
|
240 | 240 | "my_formula = \"medv~\" + all_columns\n",
|
241 | 241 | "lm = smf.ols(my_formula, data=Boston).fit()\n",
|
|
345 | 345 | "metadata": {},
|
346 | 346 | "outputs": [],
|
347 | 347 | "source": [
|
348 |
| - "# There is anova function built in already in statsmodels. \n", |
349 |
| - "# If you know what to do, use the key words to google it and likely you will find a very good answer. \n", |
350 |
| - "# Here we compare the models with one order of stat and two orders of stats. \n", |
351 |
| - "# By looking at the p value that will reject the null hypothesis that the coefficent of lstat**2 equals 0.\n", |
| 348 | + "# there is anova function built in already in statsmodels. \n", |
| 349 | + "# if you know what to do, use the key words to google it and likely you will find a very good answer. \n", |
| 350 | + "# here we compare the models with one order of stat and two orders of stats. \n", |
| 351 | + "# by looking at the p value that will reject the null hypothesis that the coefficent of lstat**2 equals 0.\n", |
352 | 352 | "table = sm.stats.anova_lm(lm_order1, lm_order2)\n",
|
353 | 353 | "print(table)"
|
354 | 354 | ]
|
|
456 | 456 | "metadata": {},
|
457 | 457 | "outputs": [],
|
458 | 458 | "source": [
|
459 |
| - "# Then the model buliding will be the same with all numerrical variables.\n", |
| 459 | + "# then the model buliding will be the same with all numerrical variables.\n", |
460 | 460 | "lm_carseats_dummy = smf.ols('Sales ~ Income + Advertising + Price + Age + ShelveLoc_Good + ShelveLoc_Medium', \n",
|
461 | 461 | " data = Carseats_dummy).fit()"
|
462 | 462 | ]
|
|
467 | 467 | "metadata": {},
|
468 | 468 | "outputs": [],
|
469 | 469 | "source": [
|
470 |
| - "# The interpretation of the coefficients are holding everything fixed, Medium shelve location is associated with an average\n", |
| 470 | + "# the interpretation of the coefficients are holding everything fixed, Medium shelve location is associated with an average\n", |
471 | 471 | "# increase of sale around 2.0046. \n",
|
472 | 472 | "lm_carseats_dummy.summary() "
|
473 | 473 | ]
|
|
499 | 499 | "metadata": {},
|
500 | 500 | "outputs": [],
|
501 | 501 | "source": [
|
502 |
| - "# Let us write a simple function to print current time. \n", |
503 |
| - "# The key word in Python for user defined function is 'def'. \n", |
504 |
| - "# Pay attention to the ':'. The difference betwwen R (others) and Python is that Python \n", |
| 502 | + "# let us write a simple function to print current time. \n", |
| 503 | + "# yhe key word in Python for user defined function is 'def'. \n", |
| 504 | + "# pay attention to the ':'. The difference betwwen R (others) and Python is that Python \n", |
505 | 505 | "# forces you to obey its indentation rules. For example, the following function won't work because of the extra space in front of 'print'.\n",
|
506 | 506 | "def print_current_time_wrong():\n",
|
507 | 507 | " from datetime import datetime # this is very bad practice !!! \n",
|
|
559 | 559 | "name": "python",
|
560 | 560 | "nbconvert_exporter": "python",
|
561 | 561 | "pygments_lexer": "ipython3",
|
562 |
| - "version": "3.5.6" |
| 562 | + "version": "3.6.2" |
563 | 563 | }
|
564 | 564 | },
|
565 | 565 | "nbformat": 4,
|
|
0 commit comments