on 23rd Nov

Jean-KOUAGOU · Nov 23, 2019 · 0ff4389 · 0ff4389
1 parent c87cded
commit 0ff4389
Show file tree

Hide file tree

Showing 9 changed files with 33,542 additions and 636 deletions.
diff --git a/.ipynb_checkpoints/Gaussian Discriminant Analysis-checkpoint.ipynb b/.ipynb_checkpoints/Gaussian Discriminant Analysis-checkpoint.ipynb
@@ -0,0 +1,247 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class GDA(object):\n",
+    "    \n",
+    "    def __init__(self, features, target):\n",
+    "        self.features=features\n",
+    "        self.target=target\n",
+    "        self.mu0=self.features.iloc[[i for i in range(self.target.shape[0]) if self.target.iloc[i]==0.0]].mean()\n",
+    "        self.mu1=self.features.iloc[[i for i in range(self.target.shape[0]) if self.target.iloc[i]==1.0]].mean()\n",
+    "        self.phi=(1.0/self.target.shape[0])*self.target[self.target==1.0].count()\n",
+    "\n",
+    "    def P_y(self, y, phi):\n",
+    "        return phi**y * (1-phi)**(1-y)\n",
+    "    \n",
+    "    def P_x_y(self, sigma, x, mu):\n",
+    "        comp1 = 1.0/(np.sqrt((2*np.pi)**self.features.shape[1]) * np.sqrt(np.linalg.det(sigma)))\n",
+    "        comp2 = float(np.exp(np.dot(-0.5*np.dot(x-mu, np.linalg.inv(sigma)), x-mu)))\n",
+    "        return comp1*comp2\n",
+    "\n",
+    "        \n",
+    "    def Sigma(self):\n",
+    "        sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
+    "        for i in range(self.target.shape[0]):\n",
+    "            if self.target.iloc[i]==0:\n",
+    "                sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu0).T, np.matrix(self.features.iloc[i, :]-self.mu0))\n",
+    "            \n",
+    "            else:\n",
+    "                sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu1).T, np.matrix(self.features.iloc[i, :]-self.mu1))\n",
+    "                \n",
+    "        return (1.0/self.target.shape[0])*sigma\n",
+    "        \n",
+    "    def predict(self, X):\n",
+    "        predictions=[]\n",
+    "        for i in range(X.shape[0]):\n",
+    "            Prob0=self.P_x_y(self.Sigma(), X.iloc[i, :], self.mu0)*self.P_y(0, self.phi)\n",
+    "            Prob1=self.P_x_y(self.Sigma(), X.iloc[i, :], self.mu1)*self.P_y(1, self.phi)\n",
+    "            if Prob0>Prob1:\n",
+    "                predictions.append(0.0)\n",
+    "            else:\n",
+    "                predictions.append(1.0)\n",
+    "        return np.array(predictions)\n",
+    "            "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mu1 = -1\n",
+    "mu2 = 3\n",
+    "sig1 = 0.5\n",
+    "sig2 = 1\n",
+    "N = 150\n",
+    "np.random.seed(10)\n",
+    "x11=np.random.randn(N,1)*sig1 + mu1\n",
+    "x12=np.random.randn(N,1)*sig1 + mu1+3\n",
+    "x21=np.random.randn(N,1)*sig2 + mu2\n",
+    "x22=np.random.randn(N,1)*sig2 + mu2+3\n",
+    "c = np.vstack((np.zeros((N,1)), np.ones((N,1))))\n",
+    "x1 = np.hstack((x11,x12))\n",
+    "x2 = np.hstack((x21,x21))\n",
+    "\n",
+    "X = np.hstack( (np.vstack( (x1,x2) ),c) )\n",
+    "np.random.shuffle(X)\n",
+    "dataset = pd.DataFrame(data=X, columns=['x','y','c'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Data_xy=dataset.drop('c', axis=1)\n",
+    "target=dataset['c']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mu1 = -1\n",
+    "mu2 = 3\n",
+    "sig1 = 0.5\n",
+    "sig2 = 1\n",
+    "N1 = 100\n",
+    "np.random.seed(10)\n",
+    "x11=np.random.randn(N1,1)*sig1 + mu1\n",
+    "x12=np.random.randn(N1,1)*sig1 + mu1+3\n",
+    "x21=np.random.randn(N1,1)*sig2 + mu2\n",
+    "x22=np.random.randn(N1,1)*sig2 + mu2+3\n",
+    "c = np.vstack((np.zeros((N1,1)), np.ones((N1,1))))\n",
+    "x1 = np.hstack((x11,x12))\n",
+    "x2 = np.hstack((x21,x22))\n",
+    "\n",
+    "X = np.hstack( (np.vstack( (x1,x2) ),c) )\n",
+    "np.random.shuffle(X)\n",
+    "dataset1 = pd.DataFrame(data=X, columns=['x','y','c'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy:  66.5 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "Gaussian=GDA(Data_xy, target)\n",
+    "\n",
+    "Features_test=dataset1[['x', 'y']]\n",
+    "Target_test=dataset1[['c']]\n",
+    "\n",
+    "predictions=Gaussian.predict(Features_test)\n",
+    "predictions=predictions.reshape(-1,1)\n",
+    "print(\"Accuracy: \", ((predictions==np.array(Target_test)).sum()/Target_test.shape[0])*100, \"%\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train data is 10% of the data  (10, 5)\n",
+      "Test data is 90% of the data  (90, 5)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn import datasets\n",
+    "\n",
+    "Iris=datasets.load_iris()\n",
+    "data=Iris['data']\n",
+    "Data=np.hstack([data, Iris['target'].reshape(-1,1)])\n",
+    "Random=list(range(data.shape[0]))\n",
+    "np.random.shuffle(Random)\n",
+    "Data=Data[Random]\n",
+    "Data\n",
+    "Col=Iris['feature_names']\n",
+    "Col.insert(len(Col), 'target')\n",
+    "Data=pd.DataFrame(Data, columns=Col)\n",
+    "Data.head()\n",
+    "Data=Data[(Data['target']==0) | (Data['target']==1)]\n",
+    "\n",
+    "Train_data=Data.iloc[0:10]\n",
+    "Test_data=Data.iloc[10:]\n",
+    "print(\"Train data is 10% of the data \",Train_data.shape)\n",
+    "print(\"Test data is 90% of the data \",Test_data.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy:  100.0 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "GaussianIris=GDA(Train_data.iloc[:,:-1], Train_data.iloc[:,-1])\n",
+    "\n",
+    "#GaussianIris.Sigma()\n",
+    "\n",
+    "pred=GaussianIris.predict(Test_data.iloc[:,:-1])\n",
+    "pred=pred.reshape(-1,1)\n",
+    "print(\"Accuracy: \", (pred==np.array(Test_data.iloc[:,-1]).reshape(-1,1)).sum()/Test_data.iloc[:,-1].shape[0]*100, \"%\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}