-
Notifications
You must be signed in to change notification settings - Fork 0
/
pca.py
43 lines (31 loc) · 1.2 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Code from Chapter 10 of Machine Learning: An Algorithmic Perspective
# by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html)
# You are free to use, change, or redistribute the code in any way you wish for
# non-commercial purposes, but please maintain the name of the original author.
# This code comes with no warranty of any kind.
# Stephen Marsland, 2008
# An algorithm to compute PCA. Not as fast as the NumPy implementation
from pylab import *
from numpy import *
def pca(data,nRedDim=0,normalise=1):
# Centre data
m = mean(data,axis=0)
data -= m
# Covariance matrix
C = cov(transpose(data))
# Compute eigenvalues and sort into descending order
evals,evecs = linalg.eig(C)
indices = argsort(evals)
indices = indices[::-1]
evecs = evecs[:,indices]
evals = evals[indices]
if nRedDim>0:
evecs = evecs[:,:nRedDim]
if normalise:
for i in range(shape(evecs)[1]):
evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i])
# Produce the new data matrix
x = dot(transpose(evecs),transpose(data))
# Compute the original data again
y=transpose(dot(evecs,x))+m
return x,y,evals,evecs