From c06e5bad456ba9f5a82a4e8f2b04606971fcf146 Mon Sep 17 00:00:00 2001 From: Tetsu Haruyama Date: Sun, 21 Jul 2024 14:15:34 +0900 Subject: [PATCH] ols -> smf.ols : 1 --- 10_Residuals.ipynb | 22 +++++++++++----------- 11_Inference.ipynb | 8 ++++---- 12_Asymptotics.ipynb | 14 +++++++------- 13_Dummies.ipynb | 26 +++++++++++++------------- 14_Hetero.ipynb | 14 +++++++------- 15_Pooling.ipynb | 19 ++++++++++--------- 16_linearmodels.ipynb | 9 ++++++--- 8_Simple_Regression.ipynb | 14 +++++++------- 9_Multiple_Regression.ipynb | 16 ++++++++-------- 9 files changed, 73 insertions(+), 69 deletions(-) diff --git a/10_Residuals.ipynb b/10_Residuals.ipynb index 99753249..1e8f26f4 100644 --- a/10_Residuals.ipynb +++ b/10_Residuals.ipynb @@ -27,8 +27,8 @@ "import numpy as np\n", "import pandas as pd\n", "import statsmodels.api as sm\n", + "import statsmodels.formula.api as smf\n", "\n", - "from statsmodels.formula.api import ols\n", "from scipy.stats import norm, uniform\n", "\n", "# 警告メッセージを非表示\n", @@ -131,7 +131,7 @@ " \n", " df = pd.DataFrame({'Y':y, 'X':x}) # DataFrame\n", " \n", - " res = ols(formula='Y ~ X', data=df).fit() # OLSの計算\n", + " res = smf.ols(formula='Y ~ X', data=df).fit() # OLSの計算\n", " u_standardized = res.get_influence().resid_studentized_internal # 標準化残差\n", " \n", " return x, y, res.fittedvalues, res.resid, u_standardized, res.rsquared # 返り値の設定" @@ -596,7 +596,7 @@ "\n", "df_diag = pd.DataFrame({'Y':y, 'X':x}) # DataFrameの作成\n", "\n", - "res_diag = ols(formula='Y ~ X', data=df_diag).fit() # OLS推定" + "res_diag = smf.ols(formula='Y ~ X', data=df_diag).fit() # OLS推定" ] }, { @@ -914,7 +914,7 @@ "\n", "df = pd.DataFrame({'Y':y, 'X':x}) # DataFrame\n", "\n", - "res = ols(formula='Y ~ X', data=df).fit() # OLSの計算\n", + "res = smf.ols(formula='Y ~ X', data=df).fit() # OLSの計算\n", "resid_std = res.get_influence().resid_studentized_internal # 標準化残差\n", "\n", "plt.scatter(res.fittedvalues,resid_std) # 散布図\n", @@ -991,7 +991,7 @@ "y = 1.0 + 0.1*x +0.1*x2+ u\n", "df = pd.DataFrame({'Y':y, 'X':x})\n", "\n", - "res = ols(formula='Y ~ X', data=df).fit()\n", + "res = smf.ols(formula='Y ~ X', data=df).fit()\n", "resid_std = res.get_influence().resid_studentized_internal\n", "\n", "plt.scatter(res.fittedvalues,resid_std)\n", @@ -1078,7 +1078,7 @@ "\n", "df = pd.DataFrame({'Y':y, 'X':x})\n", "\n", - "res = ols(formula='Y ~ X', data=df).fit()\n", + "res = smf.ols(formula='Y ~ X', data=df).fit()\n", "resid_std = res.get_influence().resid_studentized_internal\n", "\n", "plt.scatter(res.fittedvalues,resid_std)\n", @@ -1160,7 +1160,7 @@ "y = 1 + 0.1*x + x**0.6*u\n", "df = pd.DataFrame({'Y':y, 'X':x})\n", "\n", - "res = ols(formula='Y ~ X', data=df).fit()\n", + "res = smf.ols(formula='Y ~ X', data=df).fit()\n", "resid_std = res.get_influence().resid_studentized_internal\n", "\n", "plt.scatter(res.fittedvalues,resid_std)\n", @@ -1232,10 +1232,10 @@ "df_cd = pd.DataFrame({'Y':y, 'X':x})\n", "\n", "# 外れ値がない場合のOLS\n", - "res_no = ols(formula='Y ~ X', data=df_cd.loc[0:19,:]).fit()\n", + "res_no = smf.ols(formula='Y ~ X', data=df_cd.loc[0:19,:]).fit()\n", "\n", "# 外れ値がある場合のOLS\n", - "res_cd = ols(formula='Y ~ X', data=df_cd).fit()" + "res_cd = smf.ols(formula='Y ~ X', data=df_cd).fit()" ] }, { @@ -1348,7 +1348,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.9" }, "nteract": { "version": "0.23.1" @@ -1368,5 +1368,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/11_Inference.ipynb b/11_Inference.ipynb index bde3f3c6..4a20ff08 100644 --- a/11_Inference.ipynb +++ b/11_Inference.ipynb @@ -25,9 +25,9 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", + "import statsmodels.formula.api as smf\n", "import wooldridge\n", "\n", - "from statsmodels.formula.api import ols\n", "from scipy.stats import t, f\n", "\n", "# 警告メッセージを非表示\n", @@ -505,7 +505,7 @@ "outputs": [], "source": [ "formula_gpa = 'colGPA ~ hsGPA + ACT + skipped'\n", - "res_gpa = ols(formula_gpa, data=gpa).fit()" + "res_gpa = smf.ols(formula_gpa, data=gpa).fit()" ] }, { @@ -988,7 +988,7 @@ "outputs": [], "source": [ "formula_0 = 'np.log(salary) ~ years + gamesyr + bavg + hrunsyr + rbisyr'\n", - "res_0 = ols(formula_0, data=mlb1).fit()" + "res_0 = smf.ols(formula_0, data=mlb1).fit()" ] }, { @@ -1033,7 +1033,7 @@ "outputs": [], "source": [ "formula_1 = 'np.log(salary) ~ years + gamesyr'\n", - "res_1 = ols(formula_1, data=mlb1).fit()" + "res_1 = smf.ols(formula_1, data=mlb1).fit()" ] }, { diff --git a/12_Asymptotics.ipynb b/12_Asymptotics.ipynb index cb7c91f6..58d4f73d 100644 --- a/12_Asymptotics.ipynb +++ b/12_Asymptotics.ipynb @@ -25,10 +25,10 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", + "import statsmodels.formula.api as smf\n", "import wooldridge\n", "\n", "from scipy.stats import gaussian_kde, t\n", - "from statsmodels.formula.api import ols\n", "from statsmodels.api import qqplot\n", "from statsmodels.stats.stattools import jarque_bera, omni_normtest\n", "from numba import njit\n", @@ -1236,7 +1236,7 @@ "source": [ "wage1 = wooldridge.data('wage1')\n", "formula_wage = 'wage ~ educ + exper+ tenure'\n", - "res_wage = ols(formula_wage, data=wage1).fit()\n", + "res_wage = smf.ols(formula_wage, data=wage1).fit()\n", "qqplot(res_wage.resid, line='45',fit=True)\n", "pass" ] @@ -1260,7 +1260,7 @@ "source": [ "wage1 = wooldridge.data('wage1')\n", "formula_wage_log = 'np.log(wage) ~ educ + exper+ tenure'\n", - "res_wage_log = ols(formula_wage_log, data=wage1).fit()\n", + "res_wage_log = smf.ols(formula_wage_log, data=wage1).fit()\n", "qqplot(res_wage_log.resid, line='45',fit=True)\n", "pass" ] @@ -1613,7 +1613,7 @@ "outputs": [], "source": [ "form_0 = 'narr86 ~ pcnv + ptime86 + qemp86 + avgsen + tottime'\n", - "res_0 = ols(form_0, data=crime1).fit()\n", + "res_0 = smf.ols(form_0, data=crime1).fit()\n", "res_0.params" ] }, @@ -1646,7 +1646,7 @@ "outputs": [], "source": [ "form_1 = 'narr86 ~ pcnv + ptime86 + qemp86'\n", - "res_1 = ols(form_1, data=crime1).fit()\n", + "res_1 = smf.ols(form_1, data=crime1).fit()\n", "res_1.params" ] }, @@ -1710,7 +1710,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.9" }, "nteract": { "version": "0.15.0" @@ -1730,5 +1730,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/13_Dummies.ipynb b/13_Dummies.ipynb index 9d090317..43c2a2a7 100644 --- a/13_Dummies.ipynb +++ b/13_Dummies.ipynb @@ -24,8 +24,8 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", + "import statsmodels.formula.api as smf\n", "import wooldridge\n", - "from statsmodels.formula.api import ols\n", "\n", "# 警告メッセージを非表示\n", "import warnings\n", @@ -114,7 +114,7 @@ "source": [ "form_const = 'wage ~ 1' # 定数項だけの場合は1が必要\n", "\n", - "res_const = ols(form_const, data=wage1).fit()\n", + "res_const = smf.ols(form_const, data=wage1).fit()\n", "\n", "res_const.params" ] @@ -213,7 +213,7 @@ "source": [ "form_const_2 = 'wage ~ female'\n", "\n", - "res_const_2 = ols(form_const_2, data=wage1).fit()\n", + "res_const_2 = smf.ols(form_const_2, data=wage1).fit()\n", "\n", "res_const_2.params" ] @@ -481,7 +481,7 @@ "source": [ "form_const_4 = 'wage ~ marmale + singfem + marfem'\n", "\n", - "res_const_4 = ols(form_const_4, data=wage1).fit()\n", + "res_const_4 = smf.ols(form_const_4, data=wage1).fit()\n", "\n", "para4 = res_const_4.params\n", "para4" @@ -631,7 +631,7 @@ "source": [ "form_1 = 'wage ~ female + educ + exper+ tenure'\n", "\n", - "res_1 = ols(form_1, data=wage1).fit()\n", + "res_1 = smf.ols(form_1, data=wage1).fit()\n", "\n", "res_1.params" ] @@ -712,7 +712,7 @@ "source": [ "form_2 = 'np.log(wage) ~ female + female:educ + educ + exper + tenure'\n", "\n", - "res_2 = ols(form_2, data=wage1).fit()" + "res_2 = smf.ols(form_2, data=wage1).fit()" ] }, { @@ -891,7 +891,7 @@ "source": [ "form_c = 'wage ~ C(sex) + educ'\n", "\n", - "res_c = ols(form_c, data=df).fit()\n", + "res_c = smf.ols(form_c, data=df).fit()\n", "\n", "res_c.params" ] @@ -936,7 +936,7 @@ "source": [ "form_cm = 'wage ~ C(sex,Treatment(\"male\")) + educ'\n", "\n", - "res_cm = ols(form_cm, data=df).fit()\n", + "res_cm = smf.ols(form_cm, data=df).fit()\n", "\n", "res_cm.params" ] @@ -960,7 +960,7 @@ "source": [ "form_ca = 'wage ~ female + educ'\n", "\n", - "res_ca = ols(form_ca, data=df).fit()\n", + "res_ca = smf.ols(form_ca, data=df).fit()\n", "\n", "res_ca.params" ] @@ -1046,7 +1046,7 @@ "source": [ "form_p = 'np.log(pfries) ~ prpblck + np.log(income) + C(chain)'\n", "\n", - "res_p = ols(form_p, data=discrim).fit()\n", + "res_p = smf.ols(form_p, data=discrim).fit()\n", "\n", "print(res_p.summary().tables[1])" ] @@ -1202,7 +1202,7 @@ "source": [ "form_c = 'np.log(pfries) ~ prpblck + np.log(income) + chain'\n", "\n", - "res_c = ols(form_c, data=df_c).fit()\n", + "res_c = smf.ols(form_c, data=df_c).fit()\n", "\n", "print(res_c.summary().tables[1])" ] @@ -1237,7 +1237,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.9" }, "nteract": { "version": "0.15.0" @@ -1257,5 +1257,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/14_Hetero.ipynb b/14_Hetero.ipynb index 47b33811..dc299ee1 100644 --- a/14_Hetero.ipynb +++ b/14_Hetero.ipynb @@ -27,10 +27,10 @@ "import lmdiag\n", "import numpy as np\n", "import pandas as pd\n", + "import statsmodels.formula.api as smf\n", "import wooldridge\n", "\n", "from seaborn import residplot\n", - "from statsmodels.formula.api import ols\n", "from statsmodels.stats.api import het_breuschpagan, het_white\n", "from statsmodels.stats.outliers_influence import reset_ramsey\n", "\n", @@ -215,7 +215,7 @@ "source": [ "form_ols = 'cumgpa ~ sat + hsperc + tothrs + female + black + white'\n", "\n", - "mod_ols = ols(form_ols, data=gpa3)\n", + "mod_ols = smf.ols(form_ols, data=gpa3)\n", "res_ols = mod_ols.fit()\n", "\n", "print(res_ols.summary().tables[1])" @@ -328,7 +328,7 @@ }, "outputs": [], "source": [ - "res_HC3 = ols(form_ols, data=gpa3).fit(cov_type='HC3', use_t=True)\n", + "res_HC3 = smf.ols(form_ols, data=gpa3).fit(cov_type='HC3', use_t=True)\n", "\n", "print(res_HC3.summary().tables[1])" ] @@ -522,7 +522,7 @@ "source": [ "form_h = 'price ~ lotsize + sqrft + bdrms'\n", "\n", - "res_h = ols(form_h, data=hprice1).fit()\n", + "res_h = smf.ols(form_h, data=hprice1).fit()\n", "\n", "print(res_h.summary().tables[1])" ] @@ -595,7 +595,7 @@ "source": [ "form_h_log = 'np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms'\n", "\n", - "res_h_log = ols(form_h_log, data=hprice1).fit()\n", + "res_h_log = smf.ols(form_h_log, data=hprice1).fit()\n", "\n", "print(res_h_log.summary().tables[1])" ] @@ -1090,7 +1090,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.9" }, "nteract": { "version": "0.15.0" @@ -1110,5 +1110,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/15_Pooling.ipynb b/15_Pooling.ipynb index 21a73458..81ffbdfd 100644 --- a/15_Pooling.ipynb +++ b/15_Pooling.ipynb @@ -24,9 +24,10 @@ "source": [ "import numpy as np\n", "import pandas as pd\n", + "import statsmodels.formula.api as smf\n", "import wooldridge\n", + "\n", "from scipy.stats import t\n", - "from statsmodels.formula.api import ols\n", "\n", "# 警告メッセージを非表示\n", "import warnings\n", @@ -187,7 +188,7 @@ " y85:educ + y85:female + \\\n", " exper + I((exper**2)/100) + union'\n", "\n", - "result = ols(formula, cps).fit()" + "result = smf.ols(formula, cps).fit()" ] }, { @@ -489,7 +490,7 @@ "source": [ "formula = 'rprice ~ nearinc + y81 + nearinc:y81'\n", "\n", - "result = ols(formula, data=kielmc).fit()" + "result = smf.ols(formula, data=kielmc).fit()" ] }, { @@ -566,7 +567,7 @@ "source": [ "formula_1 = 'np.log(rprice) ~ nearinc * y81'\n", "\n", - "result_1 = ols(formula_1, data=kielmc).fit()\n", + "result_1 = smf.ols(formula_1, data=kielmc).fit()\n", "\n", "print(result_1.summary().tables[1])" ] @@ -594,7 +595,7 @@ "formula_2 = 'np.log(rprice) ~ nearinc * y81 + age + I(age**2) + \\\n", " np.log(intst) + np.log(land) + np.log(area) + rooms + baths'\n", "\n", - "result_2 = ols(formula_2, data=kielmc).fit()\n", + "result_2 = smf.ols(formula_2, data=kielmc).fit()\n", "\n", "print(result_2.summary().tables[1])" ] @@ -1174,7 +1175,7 @@ "source": [ "formula_1 = 'crmrte_diff ~ unem_diff'\n", "\n", - "result_1 = ols(formula_1, crime2_diff).fit()\n", + "result_1 = smf.ols(formula_1, crime2_diff).fit()\n", "\n", "print(result_1.summary().tables[1])" ] @@ -1240,7 +1241,7 @@ "source": [ "formula_ols_1 = 'crmrte ~ d87 + unem'\n", "\n", - "result_ols_1 = ols(formula_ols_1, crime2).fit()\n", + "result_ols_1 = smf.ols(formula_ols_1, crime2).fit()\n", "\n", "print(result_ols_1.summary().tables[1])" ] @@ -1271,7 +1272,7 @@ "source": [ "formula_ols_2 = 'crmrte ~ unem'\n", "\n", - "result_ols_2 = ols(formula_ols_2, crime2).fit()\n", + "result_ols_2 = smf.ols(formula_ols_2, crime2).fit()\n", "\n", "print(result_ols_2.summary().tables[1])" ] @@ -1398,7 +1399,7 @@ }, "outputs": [], "source": [ - "result_2 = ols(formula_2, crime4).fit()\n", + "result_2 = smf.ols(formula_2, crime4).fit()\n", "\n", "print(result_2.summary().tables[1])" ] diff --git a/16_linearmodels.ipynb b/16_linearmodels.ipynb index 5c7b4f4c..13d76e2d 100644 --- a/16_linearmodels.ipynb +++ b/16_linearmodels.ipynb @@ -25,6 +25,7 @@ "import pandas as pd\n", "import py4macro\n", "import wooldridge\n", + "\n", "from linearmodels.panel.data import PanelData\n", "from linearmodels.panel import FirstDifferenceOLS\n", "\n", @@ -929,7 +930,9 @@ "実際に回帰式を書くことにする。使い方は`statsmodels`と似ている。\n", "* `FirstDifferenceOLS`モジュールの関数`.from_formula`を使い次のように引数を指定する。\n", "\n", - "$$\\text{.from_formula}(\\text{回帰式}, \\text{データ})$$\n", + " $$\n", + " \\text{.from\\_formula}\\left(\\text{回帰式}, \\text{データ}\\right)\n", + " $$\n", "\n", "* 定数項を入れることはできない仕様となっている。\n", "* ここでは,以前の推定結果と比べるために,ダミー変数`d82`を追加する。" @@ -1039,7 +1042,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.9" }, "toc": { "base_numbering": 1, @@ -1056,5 +1059,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/8_Simple_Regression.ipynb b/8_Simple_Regression.ipynb index 022b702e..14b073db 100644 --- a/8_Simple_Regression.ipynb +++ b/8_Simple_Regression.ipynb @@ -27,10 +27,10 @@ "import numpy as np\n", "import pandas as pd\n", "import py4macro # 属性を調べるsee()関数が含まれる\n", + "import statsmodels.formula.api as smf # 回帰分析のパッケージ\n", "import wooldridge # wooldridgeのデータ・パッケージ\n", "\n", "from scipy.stats import norm # 正規分布関数\n", - "from statsmodels.formula.api import ols # 回帰分析のパッケージ\n", "from numba import njit # シミュレーションの計算を高速化する\n", "\n", "# 警告メッセージを非表示\n", @@ -578,7 +578,7 @@ "metadata": {}, "outputs": [], "source": [ - "mod = ols(formula, data=df)" + "mod = smf.ols(formula, data=df)" ] }, { @@ -1068,7 +1068,7 @@ }, "outputs": [], "source": [ - "res = ols(formula, data=df).fit()" + "res = smf.ols(formula, data=df).fit()" ] }, { @@ -1330,7 +1330,7 @@ "outputs": [], "source": [ "formula = 'Y ~ X'\n", - "res_sim = ols(formula, data=df_sim).fit()" + "res_sim = smf.ols(formula, data=df_sim).fit()" ] }, { @@ -1411,7 +1411,7 @@ " df_sim = pd.DataFrame({'X':x, 'Y':y})\n", " \n", " formula = 'Y ~ X'\n", - " res_sim = ols(formula, data=df_sim).fit()\n", + " res_sim = smf.ols(formula, data=df_sim).fit()\n", " \n", " return res_sim.params" ] @@ -1835,7 +1835,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.9" }, "nteract": { "version": "0.14.5" @@ -1855,5 +1855,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/9_Multiple_Regression.ipynb b/9_Multiple_Regression.ipynb index 872910ab..71cc90e8 100644 --- a/9_Multiple_Regression.ipynb +++ b/9_Multiple_Regression.ipynb @@ -27,13 +27,13 @@ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", + "import statsmodels.formula.api as smf\n", "import py4macro\n", "import wooldridge\n", "\n", "from numba import njit\n", "from pandas.plotting import scatter_matrix\n", "from scipy.stats import norm, uniform, gaussian_kde, multivariate_normal\n", - "from statsmodels.formula.api import ols\n", "from statsmodels.stats.outliers_influence import variance_inflation_factor as vif\n", "\n", "# 警告メッセージを非表示\n", @@ -182,7 +182,7 @@ }, "outputs": [], "source": [ - "res_1 = ols(formula_1, data=wage1).fit()" + "res_1 = smf.ols(formula_1, data=wage1).fit()" ] }, { @@ -506,7 +506,7 @@ }, "outputs": [], "source": [ - "res_2 = ols(formula_2, data=wage1).fit()" + "res_2 = smf.ols(formula_2, data=wage1).fit()" ] }, { @@ -548,7 +548,7 @@ "outputs": [], "source": [ "formula_3 = 'np.log(wage) ~ educ + tenure + exper + I(exper**2)'\n", - "res_3 = ols(formula_3, data=wage1).fit()" + "res_3 = smf.ols(formula_3, data=wage1).fit()" ] }, { @@ -611,7 +611,7 @@ "outputs": [], "source": [ "formula_4 = 'np.log(wage) ~ educ + exper + tenure + myfunc(exper)'\n", - "res_4 = ols(formula_4, data=wage1).fit()" + "res_4 = smf.ols(formula_4, data=wage1).fit()" ] }, { @@ -724,7 +724,7 @@ "outputs": [], "source": [ "formula_1a = 'wage ~ educ + tenure + exper'\n", - "res_1a = ols(formula_1a, data=wage1).fit()\n", + "res_1a = smf.ols(formula_1a, data=wage1).fit()\n", "res_1a.params" ] }, @@ -1341,7 +1341,7 @@ "outputs": [], "source": [ "df_check = pd.DataFrame({'Y':y,'X1':x1,'X2':x2})\n", - "res_check = ols('Y ~ X1 + X2', data=df_check).fit()\n", + "res_check = smf.ols('Y ~ X1 + X2', data=df_check).fit()\n", "res_check.params" ] }, @@ -2520,7 +2520,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.11.9" }, "nteract": { "version": "0.15.0"