-
Notifications
You must be signed in to change notification settings - Fork 0
/
optimize_support2_cox.py
125 lines (110 loc) · 4.66 KB
/
optimize_support2_cox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
tf.compat.v1.random.set_random_seed(42)
from tensorflow_probability import distributions as tfd
from tensorflow.keras.layers import Input, Dense, Activation, Concatenate, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.svm import FastSurvivalSVM
from sksurv.tree import SurvivalTree
from sksurv.ensemble import RandomSurvivalForest
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
from sksurv.metrics import concordance_index_ipcw, integrated_brier_score, cumulative_dynamic_auc
from sksurv.util import Surv
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from model import MDN
from utils import NLLLoss,CensoredNLLLoss,AlternativeNLLLoss
from utils import concordance_index_censored_scorer,concordance_index_ipcw_scorer,integrated_brier_scorer,cumulative_dynamic_auc_scorer,root_mean_squared_error_scorer
import optuna
from optimizer import Optimizer,ML_Optimizer
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import json
from sksurv.datasets import load_veterans_lung_cancer
from sksurv.column import encode_categorical
import sys
if __name__=='__main__':
argv=sys.argv[1:]
kwargs={kw[0]:kw[1] for kw in [ar.split('=') for ar in argv if ar.find('=')>0]}
args=[arg for arg in argv if arg.find('=')<0]
model = kwargs.get("model","CoxPH")
print(model)
name = f"{model}_support2_data"
# Load and Preprocess
df = pd.read_csv("Data/support2.csv", index_col="id")
drop_list = ["sps","aps","surv2m","surv6m","prg2m","prg6m","dnr","dnrday"] # These are predictions...
df = df.drop(drop_list,axis=1)
time_scaler = MinMaxScaler()
df["d.time"]= time_scaler.fit_transform(df["d.time"].to_numpy().reshape(-1, 1))
t = np.float32(df["d.time"].to_numpy())
delta = df["death"].to_numpy().astype(np.float32)
# Fix Income to be numeric (there's ordering between the categories!!)
income = []
for value in df.income:
if value == "under $11k":
income.append(0.)
elif value == "$11-$25k":
income.append(1.)
elif value == "$25-$50k":
income.append(2.)
elif value == ">$50k":
income.append(3.)
else:
income.append(np.NaN)
df.income = income
# fix sfdm2 to be numeric (there's ordering between the categories!!)
sfdm2 = []
for value in df.sfdm2:
if value == "no(M2 and SIP pres)":
sfdm2.append(0.)
elif value == "adl>=4 (>=5 if sur)":
sfdm2.append(1.)
elif value == "SIP>=30":
sfdm2.append(2.)
elif value == "Coma or Intub":
sfdm2.append(3.)
elif value == "<2 mo. follow-up":
sfdm2.append(4.)
else:
sfdm2.append(np.NaN)
df.sfdm2 = sfdm2
# fix normal values, whenever available:
normal_values = {"alb":3.5,
"pafi":333.3,
"bili":1.01,
"crea":1.01,
"bun":6.51,
"wblc":9,
"urine":2502}
df = df.fillna(normal_values)
from preprocessing import Preprocessor
cat_feats = ["sex","hospdead","dzgroup","dzclass",
"race","diabetes","dementia","ca"]
num_feats = ["age","slos","num.co","edu","income",
"scoma","charges","avtisst","hday",
"meanbp","wblc","hrt","resp","temp",
"pafi","alb","bili","crea","sod",
"bun","urine","sfdm2","adlsc"]
df = Preprocessor(cat_feat_strat="mode",num_feat_strat="knn",scaling_strategy="minmax").fit_transform(df, cat_feats=cat_feats, num_feats=num_feats)
X = df.copy()
x_size = len(X.columns)
X = np.float32(X.to_numpy())
y = np.stack([t,delta],axis = 1)
# Run Optimizer
#print("Running Optimizer with LogRank binary scoring")
#opt = ML_Optimizer(X,y,model=model,name=name,use_kfold=True,use_logrank=True)
#best_val,best_params = opt(1000)
#best_params['UnoC_LR']=best_val
#print(best_params)
#with open(f"Logs/{name}.json", "w") as write_file:
#json.dump(best_params, write_file)
print("Running Optimizer normaly")
opt = ML_Optimizer(X,y,model=model,name=name,use_kfold=True,use_logrank=False)
best_val,best_params = opt(1000)
best_params['UnoC']=best_val
print(best_params)
with open(f"Logs/{name}_no_logrank.json", "w") as write_file:
json.dump(best_params, write_file)