Skip to content

Commit fe8c48b

Browse files
authored
Merge branch 'developer' into main
2 parents c5002cd + 8d0587a commit fe8c48b

File tree

4 files changed

+13
-13
lines changed

4 files changed

+13
-13
lines changed

src/move/tasks/encode_data.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,21 @@ def encode_data(config: DataConfig):
5555
# before preprocessing:
5656
fig = plot_value_distributions(values)
5757
fig_path = str(
58-
output_path / "Value_distribution_{}_unprocessed.png".format(dataset_name)
58+
output_path / f"Value_distribution_{dataset_name}_unprocessed.png"
5959
)
6060
fig.savefig(fig_path)
6161

62-
# Plotting the value distribution for all continuous datasets:
63-
fig = plot_value_distributions(values)
64-
fig_path = str(output_path / f"Value_distribution_{dataset_name}.png")
65-
fig.savefig(fig_path)
66-
6762
if scale:
63+
logger.debug(
64+
f"Scaling dataset: {dataset_name}, log2 transform: {input_config.log2}"
65+
)
6866
values, mask_1d = preprocessing.scale(values, input_config.log2)
6967
names = names[mask_1d]
7068
logger.debug(f"Columns with zero variance: {np.sum(~mask_1d)}")
69+
# Plotting the value distribution for all continuous datasets:
70+
fig = plot_value_distributions(values)
71+
fig_path = str(output_path / f"Value_distribution_{dataset_name}.png")
72+
fig.savefig(fig_path)
73+
7174
io.dump_names(interim_data_path / f"{dataset_name}.txt", names)
7275
np.save(interim_data_path / f"{dataset_name}.npy", values)

src/move/training/training_loop.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,10 @@ def training_loop(
7272
counter = 0
7373

7474
kld_weight = 0.0
75-
kld_rate = 20 / len(kld_warmup_steps)
76-
kld_multiplier = 1 + kld_rate
7775

7876
for epoch in range(1, num_epochs + 1):
7977
if epoch in kld_warmup_steps:
80-
kld_weight = 0.05 * kld_multiplier
81-
kld_multiplier += kld_rate
78+
kld_weight += 1 / len(kld_warmup_steps)
8279

8380
if epoch in batch_dilation_steps:
8481
train_dataloader = dilate_batch(train_dataloader)

tutorial/config/data/random_continuous.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ continuous_inputs: # a list of continuous datasets
2222
- name: random.continuous.metagenomics # filename in raw_data_path
2323
log2: true # log2 transform data
2424
scale: true # scale data
25-
25+

tutorial/config/data/random_small.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ categorical_inputs: # a list of categorical datasets
1818

1919
continuous_inputs: # a list of continuous datasets
2020
- name: random.small.proteomics # filename in raw_data_path
21-
scale: true # scale data
22-
log2: true # log2 transform data
21+
log2: true #apply log2 before scaling
22+
scale: true #scale data (z-score normalize)
2323
- name: random.small.metagenomics # filename in raw_data_path
2424
scale: true # scale data
2525
log2: true # log2 transform data

0 commit comments

Comments
 (0)