theengineeringworld · siliconMagic · Sep 26, 2018 · Oct 12, 2018 · Oct 12, 2018 · Oct 16, 2018
diff --git a/Bayesian Inference Python Statistics.ipynb b/Bayesian Inference Python Statistics.ipynb
@@ -0,0 +1,120 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# The Engineering World - A Place For Learning And Exploring\n",
+    "\n",
+    "## Bayesian Inference "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Standard imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib\n",
+    "import matplotlib.pyplot as pp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'pymc3'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-5-2e5c536b7c65>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mpymc3\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'pymc3'"
+     ]
+    }
+   ],
+   "source": [
+    "import pymc3 as pm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Cross validation Python Statistics Training.ipynb b/Cross validation Python Statistics Training.ipynb
diff --git a/Goodness Of Fit.ipynb b/Goodness Of Fit.ipynb
diff --git a/Hypothesis Testing in Python Statistics.ipynb b/Hypothesis Testing in Python Statistics.ipynb
@@ -0,0 +1,95 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Python statistics essential training - 04_04_testing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Standard imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import io"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "import matplotlib\n",
+    "import matplotlib.pyplot as pp\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scipy.stats\n",
+    "import scipy.optimize\n",
+    "import scipy.spatial"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Logistic Regression.ipynb b/Logistic Regression.ipynb
diff --git a/TEW_06_hypothesis_testing_p_value_CI.py b/TEW_06_hypothesis_testing_p_value_CI.py
@@ -0,0 +1,55 @@
+#coding:utf-8
+"""
+------------------------------------------------
+@File Name    : TEW_06_hypothesis_testing_p_value_CI
+@Function     : 
+@Author       : Minux
+@Date         : 2018/10/16
+@Revised Date : 2018/10/16
+------------------------------------------------
+"""
+import math
+import io
+import numpy as np
+import pandas as pd
+
+import matplotlib.pyplot as plt
+
+import scipy.stats
+import scipy.optimize
+import scipy.spatial
+
+cholera = pd.read_csv('cholera.csv') # 霍乱数据
+pumps = pd.read_csv('pumps.csv')     # 水泵数据
+
+def Plot_Cholera_func():
+    fig = plt.figure(figsize=(10, 10))
+    img = plt.imread('london.png')
+    plt.imshow(img, extent=[-0.38, 0.38, -0.38, 0.38])
+    plt.scatter(pumps.x, pumps.y, color='b')
+    plt.scatter(cholera.x, cholera.y, color='r', s=3)
+    plt.show()
+
+def Data_stat_info():
+    print(cholera.closest.value_counts())
+    print('-'*10,'GroupBy_Closest','-'*10)
+    print(cholera.groupby('closest').deaths.sum())
+
+def simulate(n):
+    return pd.DataFrame({'closest':np.random.choice([0,1,4,5], size=n, p=[0.65, 0.15, 0.10, 0.10])})
+
+def sampling_function():
+    sampling = pd.DataFrame({'counts':[simulate(489).closest.value_counts()[0] for _ in range(10000)]})
+    # sampling.counts.hist(histtype='step')
+    # plt.show()
+    # 计算p-value
+    # the smaller p-value the more strongly we can reject the null hypothesis
+    p_value = 100.0 - scipy.stats.percentileofscore(sampling.counts, score=340)
+    print(p_value)
+
+
+if __name__ == '__main__':
+    sampling_function()
+
+
+
diff --git a/TEW_07_Anova_Fitting_Models.py b/TEW_07_Anova_Fitting_Models.py
@@ -0,0 +1,100 @@
+#coding:utf-8
+"""
+------------------------------------------------
+@File Name    : TEW_07_Anova_Fitting_Models
+@Function     : 
+@Author       : Minux
+@Date         : 2018/10/23
+@Revised Date : 2018/10/23
+------------------------------------------------
+"""
+import math
+import numpy
+import pandas as pd
+import matplotlib.pyplot as plt
+import statsmodels.api as sm
+import statsmodels.formula.api as smf
+import numpy as np
+
+gap_minder = pd.read_csv('gapminder.csv')
+g_data = gap_minder.query('year==1985')
+
+size = g_data.population * 1e-6
+colors = g_data.region.map({'Africa':'skyblue','Europe':'gold','America':'palegreen','Asia':'red'})
+
+def plot_data():
+    g_data.plot.scatter('age5_surviving','babies_per_woman',c=colors, s=size, linewidths=0.5, edgecolor='k', alpha=0.5)
+
+model = smf.ols(formula='babies_per_woman ~ 1', data=g_data)
+grand_mean = model.fit()
+
+def plot_fit(fit_model):
+    plot_data()
+    plt.scatter(g_data.age5_surviving, fit_model.predict(g_data), c=colors, s=30, linewidths=0.5,
+                edgecolors='k', marker='D')
+    plt.show()
+
+# plot_fit(grand_mean)
+print(np.char.center('mean', 30, '-'))
+'''mean'''
+print(grand_mean.params)
+print(g_data.babies_per_woman.mean())
+
+print(np.char.center('group mean', 30, '-'))
+
+'''group means'''
+group_means = smf.ols(formula='babies_per_woman ~ -1+region', data=g_data).fit()
+# plot_fit(group_means)
+
+print(group_means.params)
+print(g_data.groupby('region').babies_per_woman.mean())
+
+print(np.char.center('surviving', 30, '-'))
+surviving = smf.ols(formula='babies_per_woman ~ -1 + region + age5_surviving', data=g_data).fit()
+# plot_fit(surviving)
+
+'''add intersection term'''
+surviving_by_region_population = smf.ols(formula='babies_per_woman ~ -1+region+age5_surviving:region'
+                                      '-age5_surviving + population', data=g_data).fit()
+# plot_fit(surviving_by_region)
+print(surviving_by_region_population.params)
+
+'''
+Measure of Godness of Fit
+Mean Squared Error of Residuals
+R^2 = (Explained Variance)/(Total Variance)
+F-statistics : explanatory power of fit parameters compared to random fit vectors
+'''
+print(np.char.center('Statistics_Indicator',30,'-'))
+def statistics_indicator(*args):
+    for arg in args:
+        print(np.char.center(arg,30,'-'))
+        if arg is 'resid':
+            for model in [group_means, surviving, surviving_by_region_population]:
+                print(model.mse_resid)
+        elif arg is 'rsquared':
+            for model in [group_means, surviving, surviving_by_region_population]:
+                print(model.rsquared)
+        elif arg is 'f_value':
+            for model in [group_means, surviving, surviving_by_region_population]:
+                print(model.fvalue)
+        else:
+            continue
+
+statistics_indicator('resid','rsquared','f_value','xx')
+
+print(surviving.summary())
+
+print(sm.stats.anova_lm(group_means))
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/bootstrap_sample.png b/bootstrap_sample.png