-
Notifications
You must be signed in to change notification settings - Fork 0
/
Task12.m
74 lines (59 loc) · 2.64 KB
/
Task12.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
% Import the dataset
data = readtable('Datasets\data_5.dat');
% No need for handling missing values and categorical data. No need for
% outlier detection or feature scaling hence there is no greater or lesser
% relation between system calls
%Var6 is malicious for 1, beign for 0
%Hamming distance must be used if there is no greater or lesser relation
%between the variables, unlike euclidean distance
%-----------Build classification model
classification_model = fitcknn(data, 'Var6', 'Distance','hamming');
%-----------Partitioning
cv = cvpartition(classification_model.NumObservations,'HoldOut', 0.2);
cross_validated_model = crossval(classification_model, 'cvpartition', cv);
%-----------Make predictions for the testing set
Predictions = predict(cross_validated_model.Trained{1}, data(test(cv),1:end-1));
%-----------Analyzing the predictions
Results = confusionmat(cross_validated_model.Y(test(cv)),Predictions);
Evaluation_results = confusionmatStats(cross_validated_model.Y(test(cv)), Predictions);
%-----------Storing the results
fileID = fopen('Results\results.csv','a');
fprintf(fileID, 'KNN, %f \n',Evaluation_results.accuracy(1));
fclose(fileID);
function stats = confusionmatStats(group,grouphat)
field1 = 'confusionMat';
if nargin < 2
value1 = group;
else
[value1,gorder] = confusionmat(group,grouphat);
end
numOfClasses = size(value1,1);
totalSamples = sum(sum(value1));
[TP,TN,FP,FN,accuracy,sensitivity,specificity,precision,f_score] = deal(zeros(numOfClasses,1));
for class = 1:numOfClasses
TP(class) = value1(class,class);
tempMat = value1;
tempMat(:,class) = []; % remove column
tempMat(class,:) = []; % remove row
TN(class) = sum(sum(tempMat));
FP(class) = sum(value1(:,class))-TP(class);
FN(class) = sum(value1(class,:))-TP(class);
end
for class = 1:numOfClasses
accuracy(class) = (TP(class) + TN(class)) / totalSamples;
sensitivity(class) = TP(class) / (TP(class) + FN(class));
specificity(class) = TN(class) / (FP(class) + TN(class));
precision(class) = TP(class) / (TP(class) + FP(class));
f_score(class) = 2*TP(class)/(2*TP(class) + FP(class) + FN(class));
end
field2 = 'accuracy'; value2 = accuracy;
field3 = 'sensitivity'; value3 = sensitivity;
field4 = 'specificity'; value4 = specificity;
field5 = 'precision'; value5 = precision;
field6 = 'recall'; value6 = sensitivity;
field7 = 'Fscore'; value7 = f_score;
stats = struct(field1,value1,field2,value2,field3,value3,field4,value4,field5,value5,field6,value6,field7,value7);
if exist('gorder','var')
stats = struct(field1,value1,field2,value2,field3,value3,field4,value4,field5,value5,field6,value6,field7,value7,'groupOrder',gorder);
end
end