on 24 Nov

Jean-KOUAGOU · Nov 24, 2019 · 87460ac · 87460ac
1 parent 0ff4389
commit 87460ac
Show file tree

Hide file tree

Showing 4 changed files with 330 additions and 13,868 deletions.
diff --git a/.ipynb_checkpoints/MulticlassGDA-checkpoint.ipynb b/.ipynb_checkpoints/MulticlassGDA-checkpoint.ipynb
@@ -0,0 +1,88 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np, pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class GDAMulticlass(object):\n",
+    "    \n",
+    "    def __init__(self, features, target):\n",
+    "        self.features=features\n",
+    "        self.target=target\n",
+    "        \n",
+    "    def train(self):\n",
+    "        combined=pd.concat([self.features, self.target], axis=1)\n",
+    "        self.mu_s=[combined[combined['target']==j].drop('target', axis=1).mean(axis=0)\\\n",
+    "                   for j in range(len(self.target.unique()))]\n",
+    "        self.phi_s=(1.0/len(self.target))*np.array([self.target[self.target==j].count() \\\n",
+    "                                    for j in range(len(self.target.unique()))])\n",
+    "        sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
+    "        for i in range(self.target.shape[0]):\n",
+    "            sigma += np.dot(np.matrix(self.features.iloc[i, :]-\\\n",
+    "            self.mu_s[self.target.iloc[i]]).T, \\\n",
+    "            np.matrix(self.features.iloc[i, :]-self.mu_s[self.target.iloc[i]]))\n",
+    "        \n",
+    "        self.sigma=(1.0/self.target.shape[0])*sigma\n",
+    "        \n",
+    "    def P_y(self, y, phi_s):\n",
+    "        return phi_s[y]\n",
+    "    \n",
+    "    def P_x_given_y(self, sigma, x, mu):\n",
+    "        comp1 = 1.0/(np.sqrt((2*np.pi)**self.features.shape[1]) * np.sqrt(\\\n",
+    "                                                    np.linalg.det(sigma)))\n",
+    "        comp2 = float(np.exp(np.dot(-0.5*np.dot(x-mu, np.linalg.inv(sigma)), x-mu)))\n",
+    "        return comp1*comp2\n",
+    "        \n",
+    "    def predict(self, X):\n",
+    "        predictions=[]\n",
+    "        for i in range(X.shape[0]):\n",
+    "            Prob=[self.P_x_given_y(self.sigma, X.iloc[i, :], self.mu_s[j])*self.P_y(j,\\\n",
+    "                    self.phi_s) for j in range(len(self.target.unique()))]\n",
+    "            \n",
+    "            predictions.append(np.argmax(Prob))\n",
+    "            \n",
+    "        return np.array(predictions)\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Gaussian Discriminant Analysis.ipynb b/Gaussian Discriminant Analysis.ipynb
@@ -2,17 +2,16 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "import pandas as pd"
+    "import numpy as np, pandas as pd"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 69,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,35 +20,38 @@
     "    def __init__(self, features, target):\n",
     "        self.features=features\n",
     "        self.target=target\n",
-    "        self.mu0=self.features.iloc[[i for i in range(self.target.shape[0]) if self.target.iloc[i]==0.0]].mean()\n",
-    "        self.mu1=self.features.iloc[[i for i in range(self.target.shape[0]) if self.target.iloc[i]==1.0]].mean()\n",
+    "        \n",
+    "    def train(self):    \n",
+    "        self.mu0=self.features.iloc[[i for i in range(self.target.shape[0]) if \\\n",
+    "                                     self.target.iloc[i]==0.0]].mean()\n",
+    "        self.mu1=self.features.iloc[[i for i in range(self.target.shape[0]) if \\\n",
+    "                                     self.target.iloc[i]==1.0]].mean()\n",
+    "        \n",
     "        self.phi=(1.0/self.target.shape[0])*self.target[self.target==1.0].count()\n",
-    "\n",
+    "        \n",
+    "        sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
+    "        for i in range(self.target.shape[0]):\n",
+    "            if self.target.iloc[i]==0:\n",
+    "                sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu0).T, \\\n",
+    "                                np.matrix(self.features.iloc[i, :]-self.mu0))\n",
+    "            else:\n",
+    "                sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu1).T,\\\n",
+    "                                np.matrix(self.features.iloc[i, :]-self.mu1))\n",
+    "        self.sigma=(1.0/self.target.shape[0])*sigma\n",
+    "        \n",
     "    def P_y(self, y, phi):\n",
     "        return phi**y * (1-phi)**(1-y)\n",
     "    \n",
     "    def P_x_y(self, sigma, x, mu):\n",
     "        comp1 = 1.0/(np.sqrt((2*np.pi)**self.features.shape[1]) * np.sqrt(np.linalg.det(sigma)))\n",
     "        comp2 = float(np.exp(np.dot(-0.5*np.dot(x-mu, np.linalg.inv(sigma)), x-mu)))\n",
     "        return comp1*comp2\n",
-    "\n",
-    "        \n",
-    "    def Sigma(self):\n",
-    "        sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
-    "        for i in range(self.target.shape[0]):\n",
-    "            if self.target.iloc[i]==0:\n",
-    "                sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu0).T, np.matrix(self.features.iloc[i, :]-self.mu0))\n",
-    "            \n",
-    "            else:\n",
-    "                sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu1).T, np.matrix(self.features.iloc[i, :]-self.mu1))\n",
-    "                \n",
-    "        return (1.0/self.target.shape[0])*sigma\n",
     "        \n",
     "    def predict(self, X):\n",
     "        predictions=[]\n",
     "        for i in range(X.shape[0]):\n",
-    "            Prob0=self.P_x_y(self.Sigma(), X.iloc[i, :], self.mu0)*self.P_y(0, self.phi)\n",
-    "            Prob1=self.P_x_y(self.Sigma(), X.iloc[i, :], self.mu1)*self.P_y(1, self.phi)\n",
+    "            Prob0=self.P_x_y(self.sigma, X.iloc[i, :], self.mu0)*self.P_y(0, self.phi)\n",
+    "            Prob1=self.P_x_y(self.sigma, X.iloc[i, :], self.mu1)*self.P_y(1, self.phi)\n",
     "            if Prob0>Prob1:\n",
     "                predictions.append(0.0)\n",
     "            else:\n",
@@ -67,99 +69,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mu1 = -1\n",
-    "mu2 = 3\n",
-    "sig1 = 0.5\n",
-    "sig2 = 1\n",
-    "N = 150\n",
-    "np.random.seed(10)\n",
-    "x11=np.random.randn(N,1)*sig1 + mu1\n",
-    "x12=np.random.randn(N,1)*sig1 + mu1+3\n",
-    "x21=np.random.randn(N,1)*sig2 + mu2\n",
-    "x22=np.random.randn(N,1)*sig2 + mu2+3\n",
-    "c = np.vstack((np.zeros((N,1)), np.ones((N,1))))\n",
-    "x1 = np.hstack((x11,x12))\n",
-    "x2 = np.hstack((x21,x21))\n",
-    "\n",
-    "X = np.hstack( (np.vstack( (x1,x2) ),c) )\n",
-    "np.random.shuffle(X)\n",
-    "dataset = pd.DataFrame(data=X, columns=['x','y','c'])\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Data_xy=dataset.drop('c', axis=1)\n",
-    "target=dataset['c']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mu1 = -1\n",
-    "mu2 = 3\n",
-    "sig1 = 0.5\n",
-    "sig2 = 1\n",
-    "N1 = 100\n",
-    "np.random.seed(10)\n",
-    "x11=np.random.randn(N1,1)*sig1 + mu1\n",
-    "x12=np.random.randn(N1,1)*sig1 + mu1+3\n",
-    "x21=np.random.randn(N1,1)*sig2 + mu2\n",
-    "x22=np.random.randn(N1,1)*sig2 + mu2+3\n",
-    "c = np.vstack((np.zeros((N1,1)), np.ones((N1,1))))\n",
-    "x1 = np.hstack((x11,x12))\n",
-    "x2 = np.hstack((x21,x22))\n",
-    "\n",
-    "X = np.hstack( (np.vstack( (x1,x2) ),c) )\n",
-    "np.random.shuffle(X)\n",
-    "dataset1 = pd.DataFrame(data=X, columns=['x','y','c'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Accuracy:  66.5 %\n"
-     ]
-    }
-   ],
-   "source": [
-    "Gaussian=GDA(Data_xy, target)\n",
-    "\n",
-    "Features_test=dataset1[['x', 'y']]\n",
-    "Target_test=dataset1[['c']]\n",
-    "\n",
-    "predictions=Gaussian.predict(Features_test)\n",
-    "predictions=predictions.reshape(-1,1)\n",
-    "print(\"Accuracy: \", ((predictions==np.array(Target_test)).sum()/Target_test.shape[0])*100, \"%\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 178,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "Train data is 10% of the data  (10, 5)\n",
-      "Test data is 90% of the data  (90, 5)\n"
+      "Test data is 90% of the data  (90, 5)\n",
+      "100% accuracy on the test set\n"
      ]
     }
    ],
@@ -172,7 +91,6 @@
     "Random=list(range(data.shape[0]))\n",
     "np.random.shuffle(Random)\n",
     "Data=Data[Random]\n",
-    "Data\n",
     "Col=Iris['feature_names']\n",
     "Col.insert(len(Col), 'target')\n",
     "Data=pd.DataFrame(Data, columns=Col)\n",
@@ -182,12 +100,13 @@
     "Train_data=Data.iloc[0:10]\n",
     "Test_data=Data.iloc[10:]\n",
     "print(\"Train data is 10% of the data \",Train_data.shape)\n",
-    "print(\"Test data is 90% of the data \",Test_data.shape)"
+    "print(\"Test data is 90% of the data \",Test_data.shape)\n",
+    "print(\"100% accuracy on the test set\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 179,
    "metadata": {},
    "outputs": [
     {
@@ -201,7 +120,7 @@
    "source": [
     "GaussianIris=GDA(Train_data.iloc[:,:-1], Train_data.iloc[:,-1])\n",
     "\n",
-    "#GaussianIris.Sigma()\n",
+    "GaussianIris.train()\n",
     "\n",
     "pred=GaussianIris.predict(Test_data.iloc[:,:-1])\n",
     "pred=pred.reshape(-1,1)\n",

diff --git a/MulticlassGDA.ipynb b/MulticlassGDA.ipynb
@@ -0,0 +1,88 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np, pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class GDAMulticlass(object):\n",
+    "    \n",
+    "    def __init__(self, features, target):\n",
+    "        self.features=features\n",
+    "        self.target=target\n",
+    "        \n",
+    "    def train(self):\n",
+    "        combined=pd.concat([self.features, self.target], axis=1)\n",
+    "        self.mu_s=[combined[combined['target']==j].drop('target', axis=1).mean(axis=0)\\\n",
+    "                   for j in range(len(self.target.unique()))]\n",
+    "        self.phi_s=(1.0/len(self.target))*np.array([self.target[self.target==j].count() \\\n",
+    "                                    for j in range(len(self.target.unique()))])\n",
+    "        sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
+    "        for i in range(self.target.shape[0]):\n",
+    "            sigma += np.dot(np.matrix(self.features.iloc[i, :]-\\\n",
+    "            self.mu_s[self.target.iloc[i]]).T, \\\n",
+    "            np.matrix(self.features.iloc[i, :]-self.mu_s[self.target.iloc[i]]))\n",
+    "        \n",
+    "        self.sigma=(1.0/self.target.shape[0])*sigma\n",
+    "        \n",
+    "    def P_y(self, y, phi_s):\n",
+    "        return phi_s[y]\n",
+    "    \n",
+    "    def P_x_given_y(self, sigma, x, mu):\n",
+    "        comp1 = 1.0/(np.sqrt((2*np.pi)**self.features.shape[1]) * np.sqrt(\\\n",
+    "                                                    np.linalg.det(sigma)))\n",
+    "        comp2 = float(np.exp(np.dot(-0.5*np.dot(x-mu, np.linalg.inv(sigma)), x-mu)))\n",
+    "        return comp1*comp2\n",
+    "        \n",
+    "    def predict(self, X):\n",
+    "        predictions=[]\n",
+    "        for i in range(X.shape[0]):\n",
+    "            Prob=[self.P_x_given_y(self.sigma, X.iloc[i, :], self.mu_s[j])*self.P_y(j,\\\n",
+    "                    self.phi_s) for j in range(len(self.target.unique()))]\n",
+    "            \n",
+    "            predictions.append(np.argmax(Prob))\n",
+    "            \n",
+    "        return np.array(predictions)\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}