forked from ashutoshism/Pima-Indians-Diabetes-Database
-
Notifications
You must be signed in to change notification settings - Fork 0
/
log_regression_diabetes.R
27 lines (22 loc) · 952 Bytes
/
log_regression_diabetes.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
library(readr)
library(caTools)
setwd("E:/ashutosh/Data science/projects/Pima Indians Diabetes Database")
data_main <- read_csv("E:/ashutosh/Data science/projects/Pima Indians Diabetes Database/diabetes.csv")
#train-test split
set.seed(7)
split = sample.split(data_main,SplitRatio = 0.75)
train = subset(data_main,split =="TRUE")
test = subset(data_main,split =="FALSE")
#making log regression model
model = glm(Outcome~.-Insulin-SkinThickness ,train,family = "binomial")
predicted_op = predict(model,newdata = test,type = "response")
x = table(actual =test$Outcome,Predicted =predicted_op> 0.3)
print(x)
#accuracy
print((x[1,1]+x[2,2])/(x[1,1]+x[1,2]+x[2,1]+x[2,2]))
#for finding optimum threshold
library(ROCR)
predicted_op = predict(model,newdata = train,type = "response")
pred = prediction(predicted_op,train$Outcome)
perf = performance(pred,"tpr","fpr")
plot(perf,colorize =T,print.cutoffs.at = seq(0.1,by = 0.1))