-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmms_waf_modeling_ml_algo.py
93 lines (72 loc) · 3.05 KB
/
mms_waf_modeling_ml_algo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# -*- coding: utf-8 -*-
"""mms waf modeling ml algo.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1h2OcDhgHiLVx6_u5HlfTz9roYKkPH9-K
# Modeling and evaluation of machine learning model
Import all dependencies
"""
# Commented out IPython magic to ensure Python compatibility.
# %matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import string
from IPython.display import display
#evaluations
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
data = pd.read_csv("/home/ubuntu/tools/mms_waf/final-allpayload.csv",index_col="index")
#data
"""Selecting dependent and independent variables"""
Y = data['is_malicious']
"""independent_variables = data.columns
independent_variables = independent_variables.delete(1)
independent_variables"""
X = data.iloc[:,3:]
X
"""# Modeling data with Xgboost Classifier"""
import xgboost as xgb
#?xgb.XGBClassifier()
xgb_classifer = xgb.XGBClassifier()
xgb_classifer.fit(X,Y)
# Commented out IPython magic to ensure Python compatibility.
# %time xgb_classifer.fit(X,Y)
data['predicted_is_malicious'] = xgb_classifer.predict(X)
data.head(30)
data[["is_malicious","predicted_is_malicious"]]
#?plt.plot()
"""# Integration with website"""
independent_variables=['length', 'non-printable','punctuation', 'min-byte', 'max-byte', 'mean-byte', 'std-byte','distinct-byte', 'sql-keywords', 'js-keywords']
independent_variables
independent_variables=data.columns
sql_keywords = pd.read_csv('/home/ubuntu/tools/mms_waf/SQLKeywords.txt', index_col=False)
js_keywords = pd.read_csv("/home/ubuntu/tools/mms_waf/JavascriptKeywords.txt",index_col=False)
def calculate_features_and_predict(payload):
features = {}
payload = str(payload)
features['length'] = len(payload)
features['non-printable'] = len([1 for letter in payload if letter not in string.printable])
features['punctuation'] = len([1 for letter in payload if letter in string.punctuation])
features['min-byte'] = min(bytearray(payload,'utf-8'))
features['max-byte'] = max(bytearray(payload,'utf-8'))
features['mean-byte'] = np.mean(bytearray(payload,'utf-8'))
features['std-byte'] = np.std(bytearray(payload,'utf-8'))
features['distinct-byte'] = len(set(bytearray(payload,'utf-8')))
features['sql-keywords'] = len([1 for keyword in sql_keywords['Keyword'] if str(keyword).lower() in payload.lower()])
features['js-keywords'] = len([1 for keyword in js_keywords['Keyword'] if str(keyword).lower() in payload.lower()])
#payload_df = pd.DataFrame(data=features,index=[0],columns=independent_variables)
payload_df = pd.DataFrame(features,index=[0])
display(payload_df)
result = xgb_classifer.predict(payload_df)
#display(result)
return result[0]
calculate_features_and_predict("<>")
payload=''
while (payload != 'exit' ):
payload = input("Enter payload")
result = calculate_features_and_predict(payload)
if(result > 0):
print(f"Your payload {payload} is malicious - 403 error\n")
else:
print(f"Your payload {payload} is safe 200 OK\n")