-
Notifications
You must be signed in to change notification settings - Fork 0
/
devcon_tune_SVM_libelinear_feature_selection.R
95 lines (79 loc) · 3.32 KB
/
devcon_tune_SVM_libelinear_feature_selection.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
## DEVCON LIBNEAR + RF (kernelab) TUNNING SCRIPT
## 06/05/2020
setwd("/home/harpo/Dropbox/ongoing-work/git-repos/devcon/phase3/")
# Load RF models
#load("results_rf_devcon_bestmodels_fgdata_cps_less_feat.rdata")
load("results_rf_devcon_bestmodels_fgdata_cps_20000_feat_2.rdata")
# Load dataset with full set of features
load("deconv_cgdata_cps_new_feat.RData")
library(caret)
library(LiblineaR)
library(foreach)
library(tibble)
library(randomForest)
library(doMC)
library(dplyr)
library(optparse)
registerDoMC(cores=6)
epsilon_range <- c(0.005, 0.05, 0.1, 0.00005,0.0005)
cost_range <- c(0.0001, 0.001, 0.01, 0.1, 0.5, 1, 2, 4,8,16,32)
#### MAIN
option_list <- list(
make_option("--experimenttag", action="store", type="character", default="default-experiment", help = "Set experiment tag id "),
make_option("--nfeature", action="store", type="numeric", default=500, help = "Set the maximun number of features ")
)
opt <- parse_args(OptionParser(option_list=option_list))
#epsilon_range <- c(0.005)
#cost_range <- c(0.0001)
rf_models<-results_final_models
select_features <-function(data,label){
varimp<-randomForest::importance(rf_models[[label]])
varimp<- varimp %>% as.data.frame() %>% tibble::add_column(feature=rownames(varimp))# %>%
best_features<-(varimp %>% arrange(desc(IncNodePurity)))[1:opt$nfeature,] %>% select(feature) %>% unlist() %>% unname()
data[which(data %>% rownames() %in% best_features),]
}
parms <- expand.grid(cost = cost_range, epsilon = epsilon_range)
results_final <-c()
results_final_models <- list()
# Starting training ---------
for (label_number in rownames(trainprop)) {
# Create datasets ----------
labels <- trainprop[label_number, ]
trainset <- t(select_features(train,label_number))
#trainset <- scale(trainset,center=TRUE,scale=TRUE)
#data_train <- cbind(label = labels, trainset)
labels_test <- testprop[label_number, ]
testset <- t(select_features(test,label_number))
#data_test <- cbind(label = labels_test, testset)
# Start parallel fine tuning -----------
results <- foreach(i = 1:nrow(parms), .combine = rbind) %dopar% {
c <- parms[i,]$cost
e <- parms[i,]$epsilon
model <- LiblineaR(
target=labels,
data = trainset,
type = 13,
cost = c,
svr_eps = e,
)
preds <- predict(model, testset)
spear <- cor(x = preds$predictions, y = labels_test, method = "spearman")
pears <- cor(x = preds$predictions, y = labels_test, method = "pearson")
partial_results<-data.frame(label_number,parms[i,], pearson = pears, spearman = spear)
partial_results
}
# Select best model ---------
best_model <- results %>% arrange(desc(pearson)) %>% filter(row_number()==1)
print(paste("selecting best model for ",best_model$label_number," : ",best_model$cost, ", ", best_model$epsilon," Pearson value : ", best_model$pearson,sep=""))
model <- LiblineaR(
target=labels,
data = trainset,
type = 13,
cost = best_model$cost,
svr_eps = best_model$epsilon,
)
results_final_models[[label_number]]<-model
save(results_final_models,file = paste0("results_liblinear_devcon_bestmodels_fgdata_cps_20000_2",opt$experimenttag,".rdata"),compress = "gzip")
results_final<-rbind(results_final,results)
readr::write_csv(results_final,path=paste0("results_liblinear_devcon_fgdata_cps_20000_2",opt$experimenttag,".csv"))
}