-
Notifications
You must be signed in to change notification settings - Fork 1
/
pdfs.py
130 lines (88 loc) · 3.8 KB
/
pdfs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# These functions return the probablity of 'value' in the given distribution
def gaussian(value, μ, 𝜎):
# Calculate variance
𝜎2 = 𝜎 * 𝜎
# Calculate exponentiated term
exp = np.power(np.e, -(np.power(value - μ, 2) / (2 * 𝜎2)))
# Calculate denominator term
den = np.power((np.sqrt(2 * np.pi * 𝜎2)), -1)
# Calculate probability from both values above
prob = den * exp
return prob
# 'a' --> lower bound on the interval
# 'b' --> upper bound on the interval
def uniform(value, a, b):
return 1/(b - a) if (a <= value <= b) else 0
def rayleigh(value, 𝜎):
# Rayleigh only defined for positive values
if value < 0:
return 0
# Left side part
multiplicand_a = value/(𝜎**2)
# Exponent on the exponential
exponent = (-value**2)/(2 * 𝜎**2)
# Right side part
multiplicand_b = np.exp(exponent)
# Return Rayleigh value
return multiplicand_a * multiplicand_b
# Classify using MLE
# Parameter Notes:
# featuresData --> the dataset without label column
# means --> a length 2 array with feature means for [ wins, losses ]
# stdDev --> a length 2 array with feature means for [ wins, losses ]
# 'a' --> array of lower bounds on each feature's interval
# 'b' --> array of upper bound on each feature's interval
# MLE with Guassian PDF
def gaussian_classify(featureData, means, stdDev):
# Create an empty column of predictedLabels
n = featureData.shape[0]
predictedLabels = np.zeros((n,))
# Iterate over entire dataset
for loc, featureList in enumerate(featureData.values):
probOfWin = 1
probOfLoss = 1
# Calculate probability of values in win / loss distributions
for i, feature in enumerate(featureList):
probOfWin *= gaussian(feature, means[0][i], stdDev[0][i])
probOfLoss *= gaussian(feature, means[1][i], stdDev[1][i])
# Choose highest probability
predictedLabels[loc] = 1 if probOfWin > probOfLoss else -1
return predictedLabels
# MLE with uniform PDF
def uniform_classify(featureData, a, b):
# Create an empty column of predictedLabels
n = featureData.shape[0]
predictedLabels = np.zeros((n,))
# Iterate over entire dataset
for loc, featureList in enumerate(featureData.values):
probOfWin = 1
probOfLoss = 1
# Calculate probability of values in win / loss distributions
for i, feature in enumerate(featureList):
probOfWin *= uniform(feature, a[0][i], b[0][i])
probOfLoss *= uniform(feature, a[1][i], b[1][i])
# Choose highest probability
predictedLabels[loc] = 1 if probOfWin > probOfLoss else -1
return predictedLabels
# MLE with rayleigh PDF
def rayleigh_classify(featuresData, stdDev):
# Epsilon for zero-values
ε = 0.00001
# Create an empty column of predictedLabels
predictedLabels = __fill(len(featuresData), 0)
# Iterate over entire dataset
for loc, featuresList in enumerate(featuresData.values):
# Instantiate initial probabilities
probOfWin = 1
probOfLoss = 1
# Calculate probability of values in win / loss distributions
for i, feature in enumerate(featuresList):
# Pad the feature value with epsilon if it's equal to 0 to avoid
# knocking out the probability value.
if feature == 0:
feature = ε
probOfWin = rayleigh(feature, stdDev[0][i])
probOfLoss = rayleigh(feature, stdDev[1][i])
# Choose highest probability
predictedLabels[loc] = 1 if probOfWin >= probOfLoss else -1
return predictedLabels