-
Notifications
You must be signed in to change notification settings - Fork 0
/
outlier_count.py
54 lines (43 loc) · 1.66 KB
/
outlier_count.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import numpy as np
from sklearn.metrics import roc_curve
data = pd.read_csv("/home/ravi/Documents/DWDM/Project/data/DSL-StrongPasswordData.csv")
subjects = data["subject"].unique()
EERS = []
def getEER(user_scores, imposter_scores):
labels = [0]*len(user_scores) + [1]*len(imposter_scores)
fpr, tpr, thresholds = roc_curve(labels, user_scores + imposter_scores)
missrates = 1 - tpr
farates = fpr
dists = missrates - farates
idx1 = np.argmin(dists[dists >= 0])
idx2 = np.argmax(dists[dists < 0])
x = [missrates[idx1], farates[idx1]]
y = [missrates[idx2], farates[idx2]]
a = ( x[0] - x[1] ) / ( y[1] - x[1] - y[0] + x[0] )
eer = x[0] + a * ( y[0] - x[0] )
return eer
def test(mean, std_dev, test):
scores = []
for i in range(test.shape[0]):
count = 0.0
for j in range(len(mean)):
curr = abs(test.iloc[i].values[j] - mean[j]) // std_dev[j]
if(curr > 2.96):
count += 1.0
scores.append(count)
return scores
for subject in subjects:
genuine = data.loc[data.subject == subject, "H.period":"H.Return"]
genuine_train = genuine.head(300)
genuine_test = genuine.tail(100)
# taking all others as imposter
imposter = data.loc[data.subject != subject, "H.period":"H.Return"]
mean_train = genuine_train.mean().values
std_dev_train = genuine_train.std().values
genuine_user_scores = test(mean_train, std_dev_train, genuine_test)
imposter_scores = test(mean_train, std_dev_train, imposter)
curr_eer = getEER(genuine_user_scores, imposter_scores)
print subject, ":", curr_eer
EERS.append(curr_eer)
print EERS.mean()