1
+ import scipy as sc
2
+ import numpy as np
3
+ import matplotlib .pyplot as plt
4
+ from collections import Counter
5
+ import math
6
+
7
+ # parses into startDate, startHour, startMin, startSec, durHour, durMin, durSec, serv, srcPort, destPort, srcIP, destIP
8
+ def importLog (log ):
9
+ with open (log ) as f :
10
+ return [access .replace (":r" ,"**" ).replace (':' ,' ' ).replace ("**" , ":r" ).strip ().split (' ' )[1 :] for access in f .readlines ()]
11
+
12
+ # finds anomolies in access frequencies
13
+ def findAccessAnomalies (data ):
14
+ # breaks down minute-long intervals from data
15
+ intervalDict = {}
16
+ for access in data :
17
+ # breaks to 10-second intervals
18
+ seconds = int (access [3 ])
19
+ seconds = seconds - (seconds % 10 )
20
+
21
+ key = (int (access [1 ]), int (access [2 ]), seconds )
22
+ if key in intervalDict :
23
+ intervalDict [key ].append (access )
24
+ else :
25
+ intervalDict [key ] = [access ]
26
+
27
+ totAccess = [len (intervalDict [key ]) for key in intervalDict ]
28
+ totAccessMean = sc .mean (totAccess )
29
+ totAccessVar = sc .var (totAccess )
30
+ # print totAccessMean
31
+ # print totAccessVar
32
+
33
+ clientAccess = []
34
+ clientDict = {}
35
+ for key in intervalDict :
36
+ count = Counter ([access [10 ] for access in intervalDict [key ]])
37
+ for ckey in count :
38
+ clientAccess .append (count [ckey ])
39
+ clientDict [(key [0 ], key [1 ], key [2 ], ckey )] = count [ckey ]
40
+
41
+ clientAccessMean = sc .mean (clientAccess )
42
+ clientAccessVar = sc .var (clientAccess )
43
+ # print clientAccessMean
44
+ # print clientAccessVar
45
+
46
+ clientAttackProb = {}
47
+ for key in clientDict :
48
+ totProb = totAccessVar / pow ((totAccessMean - len (intervalDict [(key [0 ],key [1 ],key [2 ])])),2 )
49
+ clientProb = clientAccessVar / pow ((clientAccessMean - clientDict [key ]),2 )
50
+ prob = (totProb + clientProb )/ 2
51
+ clientAttackProb [key ] = prob
52
+
53
+ arr = []
54
+ for i in range (10 ):
55
+ minKey = min (clientAttackProb , key = clientAttackProb .get )
56
+ arr .append ((minKey , clientAttackProb [minKey ]))
57
+ clientAttackProb .pop (minKey , None )
58
+ return arr
59
+
60
+ # calculate the normalized entropy for the given data array
61
+ def calcNormEntropy (data ):
62
+ count = Counter (data )
63
+ total = len (data )
64
+ h = 0
65
+ for key in count :
66
+ pkey = float (count [key ])/ total
67
+ h += - pkey * math .log (pkey , 2 )
68
+ if h != 0 :
69
+ h /= math .log (len (count .keys ()),2 )
70
+ return h
71
+
72
+ # returns a dictionary of arrays of intervals of time length n minutes, extracts the entry-th entry from the row
73
+ def getIntervals (data , n , entry ):
74
+ intervalDict = {}
75
+ for access in data :
76
+ # breaks to 10-second intervals
77
+ minutes = int (access [2 ])
78
+ minutes = minutes - (minutes % n )
79
+
80
+ key = (int (access [1 ]), minutes )
81
+ if key in intervalDict :
82
+ intervalDict [key ].append (access [entry ])#.split('.')[0])
83
+ else :
84
+ intervalDict [key ] = [access [entry ]]#.split('.')[0]]
85
+ return intervalDict
86
+
87
+ if __name__ == "__main__" :
88
+ log_file = "server-log.txt"
89
+ data = importLog (log_file )
90
+ inter = getIntervals (data ,5 ,6 ) #also want 7; (5,6) find first attack lol...
91
+
92
+ entropy = [(key [0 ], key [1 ], calcNormEntropy (inter [key ])) for key in inter ]
93
+ entropy .sort ()
94
+
95
+ plt .plot ([ent [0 ]* 100 + ent [1 ] for ent in entropy ], [ent [2 ] for ent in entropy ])
96
+ plt .show ()
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
0 commit comments