File tree Expand file tree Collapse file tree 4 files changed +13
-13
lines changed Expand file tree Collapse file tree 4 files changed +13
-13
lines changed Original file line number Diff line number Diff line change @@ -55,18 +55,21 @@ def encode_data(config: DataConfig):
55
55
# before preprocessing:
56
56
fig = plot_value_distributions (values )
57
57
fig_path = str (
58
- output_path / "Value_distribution_{}_unprocessed.png" . format ( dataset_name )
58
+ output_path / f "Value_distribution_{ dataset_name } _unprocessed.png"
59
59
)
60
60
fig .savefig (fig_path )
61
61
62
- # Plotting the value distribution for all continuous datasets:
63
- fig = plot_value_distributions (values )
64
- fig_path = str (output_path / f"Value_distribution_{ dataset_name } .png" )
65
- fig .savefig (fig_path )
66
-
67
62
if scale :
63
+ logger .debug (
64
+ f"Scaling dataset: { dataset_name } , log2 transform: { input_config .log2 } "
65
+ )
68
66
values , mask_1d = preprocessing .scale (values , input_config .log2 )
69
67
names = names [mask_1d ]
70
68
logger .debug (f"Columns with zero variance: { np .sum (~ mask_1d )} " )
69
+ # Plotting the value distribution for all continuous datasets:
70
+ fig = plot_value_distributions (values )
71
+ fig_path = str (output_path / f"Value_distribution_{ dataset_name } .png" )
72
+ fig .savefig (fig_path )
73
+
71
74
io .dump_names (interim_data_path / f"{ dataset_name } .txt" , names )
72
75
np .save (interim_data_path / f"{ dataset_name } .npy" , values )
Original file line number Diff line number Diff line change @@ -72,13 +72,10 @@ def training_loop(
72
72
counter = 0
73
73
74
74
kld_weight = 0.0
75
- kld_rate = 20 / len (kld_warmup_steps )
76
- kld_multiplier = 1 + kld_rate
77
75
78
76
for epoch in range (1 , num_epochs + 1 ):
79
77
if epoch in kld_warmup_steps :
80
- kld_weight = 0.05 * kld_multiplier
81
- kld_multiplier += kld_rate
78
+ kld_weight += 1 / len (kld_warmup_steps )
82
79
83
80
if epoch in batch_dilation_steps :
84
81
train_dataloader = dilate_batch (train_dataloader )
Original file line number Diff line number Diff line change @@ -22,4 +22,4 @@ continuous_inputs: # a list of continuous datasets
22
22
- name : random.continuous.metagenomics # filename in raw_data_path
23
23
log2 : true # log2 transform data
24
24
scale : true # scale data
25
-
25
+
Original file line number Diff line number Diff line change @@ -18,8 +18,8 @@ categorical_inputs: # a list of categorical datasets
18
18
19
19
continuous_inputs : # a list of continuous datasets
20
20
- name : random.small.proteomics # filename in raw_data_path
21
- scale : true # scale data
22
- log2 : true # log2 transform data
21
+ log2 : true # apply log2 before scaling
22
+ scale : true # scale data (z-score normalize)
23
23
- name : random.small.metagenomics # filename in raw_data_path
24
24
scale : true # scale data
25
25
log2 : true # log2 transform data
You can’t perform that action at this time.
0 commit comments