Skip to content

Commit

Permalink
on 24 Nov
Browse files Browse the repository at this point in the history
  • Loading branch information
Jean-KOUAGOU committed Nov 24, 2019
1 parent 0ff4389 commit 87460ac
Show file tree
Hide file tree
Showing 4 changed files with 330 additions and 13,868 deletions.
88 changes: 88 additions & 0 deletions .ipynb_checkpoints/MulticlassGDA-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np, pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class GDAMulticlass(object):\n",
" \n",
" def __init__(self, features, target):\n",
" self.features=features\n",
" self.target=target\n",
" \n",
" def train(self):\n",
" combined=pd.concat([self.features, self.target], axis=1)\n",
" self.mu_s=[combined[combined['target']==j].drop('target', axis=1).mean(axis=0)\\\n",
" for j in range(len(self.target.unique()))]\n",
" self.phi_s=(1.0/len(self.target))*np.array([self.target[self.target==j].count() \\\n",
" for j in range(len(self.target.unique()))])\n",
" sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
" for i in range(self.target.shape[0]):\n",
" sigma += np.dot(np.matrix(self.features.iloc[i, :]-\\\n",
" self.mu_s[self.target.iloc[i]]).T, \\\n",
" np.matrix(self.features.iloc[i, :]-self.mu_s[self.target.iloc[i]]))\n",
" \n",
" self.sigma=(1.0/self.target.shape[0])*sigma\n",
" \n",
" def P_y(self, y, phi_s):\n",
" return phi_s[y]\n",
" \n",
" def P_x_given_y(self, sigma, x, mu):\n",
" comp1 = 1.0/(np.sqrt((2*np.pi)**self.features.shape[1]) * np.sqrt(\\\n",
" np.linalg.det(sigma)))\n",
" comp2 = float(np.exp(np.dot(-0.5*np.dot(x-mu, np.linalg.inv(sigma)), x-mu)))\n",
" return comp1*comp2\n",
" \n",
" def predict(self, X):\n",
" predictions=[]\n",
" for i in range(X.shape[0]):\n",
" Prob=[self.P_x_given_y(self.sigma, X.iloc[i, :], self.mu_s[j])*self.P_y(j,\\\n",
" self.phi_s) for j in range(len(self.target.unique()))]\n",
" \n",
" predictions.append(np.argmax(Prob))\n",
" \n",
" return np.array(predictions)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
141 changes: 30 additions & 111 deletions Gaussian Discriminant Analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,16 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
"import numpy as np, pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -21,35 +20,38 @@
" def __init__(self, features, target):\n",
" self.features=features\n",
" self.target=target\n",
" self.mu0=self.features.iloc[[i for i in range(self.target.shape[0]) if self.target.iloc[i]==0.0]].mean()\n",
" self.mu1=self.features.iloc[[i for i in range(self.target.shape[0]) if self.target.iloc[i]==1.0]].mean()\n",
" \n",
" def train(self): \n",
" self.mu0=self.features.iloc[[i for i in range(self.target.shape[0]) if \\\n",
" self.target.iloc[i]==0.0]].mean()\n",
" self.mu1=self.features.iloc[[i for i in range(self.target.shape[0]) if \\\n",
" self.target.iloc[i]==1.0]].mean()\n",
" \n",
" self.phi=(1.0/self.target.shape[0])*self.target[self.target==1.0].count()\n",
"\n",
" \n",
" sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
" for i in range(self.target.shape[0]):\n",
" if self.target.iloc[i]==0:\n",
" sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu0).T, \\\n",
" np.matrix(self.features.iloc[i, :]-self.mu0))\n",
" else:\n",
" sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu1).T,\\\n",
" np.matrix(self.features.iloc[i, :]-self.mu1))\n",
" self.sigma=(1.0/self.target.shape[0])*sigma\n",
" \n",
" def P_y(self, y, phi):\n",
" return phi**y * (1-phi)**(1-y)\n",
" \n",
" def P_x_y(self, sigma, x, mu):\n",
" comp1 = 1.0/(np.sqrt((2*np.pi)**self.features.shape[1]) * np.sqrt(np.linalg.det(sigma)))\n",
" comp2 = float(np.exp(np.dot(-0.5*np.dot(x-mu, np.linalg.inv(sigma)), x-mu)))\n",
" return comp1*comp2\n",
"\n",
" \n",
" def Sigma(self):\n",
" sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
" for i in range(self.target.shape[0]):\n",
" if self.target.iloc[i]==0:\n",
" sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu0).T, np.matrix(self.features.iloc[i, :]-self.mu0))\n",
" \n",
" else:\n",
" sigma += np.dot(np.matrix(self.features.iloc[i, :]-self.mu1).T, np.matrix(self.features.iloc[i, :]-self.mu1))\n",
" \n",
" return (1.0/self.target.shape[0])*sigma\n",
" \n",
" def predict(self, X):\n",
" predictions=[]\n",
" for i in range(X.shape[0]):\n",
" Prob0=self.P_x_y(self.Sigma(), X.iloc[i, :], self.mu0)*self.P_y(0, self.phi)\n",
" Prob1=self.P_x_y(self.Sigma(), X.iloc[i, :], self.mu1)*self.P_y(1, self.phi)\n",
" Prob0=self.P_x_y(self.sigma, X.iloc[i, :], self.mu0)*self.P_y(0, self.phi)\n",
" Prob1=self.P_x_y(self.sigma, X.iloc[i, :], self.mu1)*self.P_y(1, self.phi)\n",
" if Prob0>Prob1:\n",
" predictions.append(0.0)\n",
" else:\n",
Expand All @@ -67,99 +69,16 @@
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"mu1 = -1\n",
"mu2 = 3\n",
"sig1 = 0.5\n",
"sig2 = 1\n",
"N = 150\n",
"np.random.seed(10)\n",
"x11=np.random.randn(N,1)*sig1 + mu1\n",
"x12=np.random.randn(N,1)*sig1 + mu1+3\n",
"x21=np.random.randn(N,1)*sig2 + mu2\n",
"x22=np.random.randn(N,1)*sig2 + mu2+3\n",
"c = np.vstack((np.zeros((N,1)), np.ones((N,1))))\n",
"x1 = np.hstack((x11,x12))\n",
"x2 = np.hstack((x21,x21))\n",
"\n",
"X = np.hstack( (np.vstack( (x1,x2) ),c) )\n",
"np.random.shuffle(X)\n",
"dataset = pd.DataFrame(data=X, columns=['x','y','c'])\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"Data_xy=dataset.drop('c', axis=1)\n",
"target=dataset['c']"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"mu1 = -1\n",
"mu2 = 3\n",
"sig1 = 0.5\n",
"sig2 = 1\n",
"N1 = 100\n",
"np.random.seed(10)\n",
"x11=np.random.randn(N1,1)*sig1 + mu1\n",
"x12=np.random.randn(N1,1)*sig1 + mu1+3\n",
"x21=np.random.randn(N1,1)*sig2 + mu2\n",
"x22=np.random.randn(N1,1)*sig2 + mu2+3\n",
"c = np.vstack((np.zeros((N1,1)), np.ones((N1,1))))\n",
"x1 = np.hstack((x11,x12))\n",
"x2 = np.hstack((x21,x22))\n",
"\n",
"X = np.hstack( (np.vstack( (x1,x2) ),c) )\n",
"np.random.shuffle(X)\n",
"dataset1 = pd.DataFrame(data=X, columns=['x','y','c'])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 66.5 %\n"
]
}
],
"source": [
"Gaussian=GDA(Data_xy, target)\n",
"\n",
"Features_test=dataset1[['x', 'y']]\n",
"Target_test=dataset1[['c']]\n",
"\n",
"predictions=Gaussian.predict(Features_test)\n",
"predictions=predictions.reshape(-1,1)\n",
"print(\"Accuracy: \", ((predictions==np.array(Target_test)).sum()/Target_test.shape[0])*100, \"%\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 178,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train data is 10% of the data (10, 5)\n",
"Test data is 90% of the data (90, 5)\n"
"Test data is 90% of the data (90, 5)\n",
"100% accuracy on the test set\n"
]
}
],
Expand All @@ -172,7 +91,6 @@
"Random=list(range(data.shape[0]))\n",
"np.random.shuffle(Random)\n",
"Data=Data[Random]\n",
"Data\n",
"Col=Iris['feature_names']\n",
"Col.insert(len(Col), 'target')\n",
"Data=pd.DataFrame(Data, columns=Col)\n",
Expand All @@ -182,12 +100,13 @@
"Train_data=Data.iloc[0:10]\n",
"Test_data=Data.iloc[10:]\n",
"print(\"Train data is 10% of the data \",Train_data.shape)\n",
"print(\"Test data is 90% of the data \",Test_data.shape)"
"print(\"Test data is 90% of the data \",Test_data.shape)\n",
"print(\"100% accuracy on the test set\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 179,
"metadata": {},
"outputs": [
{
Expand All @@ -201,7 +120,7 @@
"source": [
"GaussianIris=GDA(Train_data.iloc[:,:-1], Train_data.iloc[:,-1])\n",
"\n",
"#GaussianIris.Sigma()\n",
"GaussianIris.train()\n",
"\n",
"pred=GaussianIris.predict(Test_data.iloc[:,:-1])\n",
"pred=pred.reshape(-1,1)\n",
Expand Down
88 changes: 88 additions & 0 deletions MulticlassGDA.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np, pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class GDAMulticlass(object):\n",
" \n",
" def __init__(self, features, target):\n",
" self.features=features\n",
" self.target=target\n",
" \n",
" def train(self):\n",
" combined=pd.concat([self.features, self.target], axis=1)\n",
" self.mu_s=[combined[combined['target']==j].drop('target', axis=1).mean(axis=0)\\\n",
" for j in range(len(self.target.unique()))]\n",
" self.phi_s=(1.0/len(self.target))*np.array([self.target[self.target==j].count() \\\n",
" for j in range(len(self.target.unique()))])\n",
" sigma=np.matrix(np.zeros([self.features.shape[1], self.features.shape[1]]))\n",
" for i in range(self.target.shape[0]):\n",
" sigma += np.dot(np.matrix(self.features.iloc[i, :]-\\\n",
" self.mu_s[self.target.iloc[i]]).T, \\\n",
" np.matrix(self.features.iloc[i, :]-self.mu_s[self.target.iloc[i]]))\n",
" \n",
" self.sigma=(1.0/self.target.shape[0])*sigma\n",
" \n",
" def P_y(self, y, phi_s):\n",
" return phi_s[y]\n",
" \n",
" def P_x_given_y(self, sigma, x, mu):\n",
" comp1 = 1.0/(np.sqrt((2*np.pi)**self.features.shape[1]) * np.sqrt(\\\n",
" np.linalg.det(sigma)))\n",
" comp2 = float(np.exp(np.dot(-0.5*np.dot(x-mu, np.linalg.inv(sigma)), x-mu)))\n",
" return comp1*comp2\n",
" \n",
" def predict(self, X):\n",
" predictions=[]\n",
" for i in range(X.shape[0]):\n",
" Prob=[self.P_x_given_y(self.sigma, X.iloc[i, :], self.mu_s[j])*self.P_y(j,\\\n",
" self.phi_s) for j in range(len(self.target.unique()))]\n",
" \n",
" predictions.append(np.argmax(Prob))\n",
" \n",
" return np.array(predictions)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 87460ac

Please sign in to comment.