-
Notifications
You must be signed in to change notification settings - Fork 2
/
exp_gen.py
51 lines (42 loc) · 2.55 KB
/
exp_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/python3 python
"""
Lists the Experiment baselines and training
"""
from classes import Experiment, Baseline, ModelConfig, TrainingConfig, TestingConfig
from constants import *
datasets = ['POP909-merged', 'GiantMIDI']
tokenizations = ['TSD', 'REMI']
bpe_factors = [0, 4, 10, 20, 50, 100]
gen_model_conf = ModelConfig()
cla_model_conf = ModelConfig(dim=CLA_DIM, nb_heads=CLA_NB_HEADS, d_ffwd=CLA_D_FFWD, nb_layers=CLA_NB_LAYERS,
embed_sizes=[CLA_DIM] * 5)
cla_model_conf_large = ModelConfig(dim=CLA_LARGE_DIM, nb_heads=CLA_LARGE_NB_HEADS, d_ffwd=CLA_LARGE_D_FFWD,
nb_layers=CLA_LARGE_NB_LAYERS, embed_sizes=[CLA_LARGE_DIM] * 5)
gen_train_conf = TrainingConfig()
cla_pre_train_conf = TrainingConfig(training_steps=CLA_PRE_TRAINING_STEPS, learning_rate=CLA_PT_LEARNING_RATE,
batch_size=CLA_BATCH_SIZE, iterator_kwargs={'early_stop_steps': CLA_EARLY_STOP})
cla_fine_tune_conf = TrainingConfig(training_steps=CLA_TRAINING_STEPS, learning_rate=CLA_FT_LEARNING_RATE,
batch_size=CLA_BATCH_SIZE, iterator_kwargs={'early_stop_steps': CLA_EARLY_STOP})
gen_test_conf = TestingConfig()
experiments = []
for dataset in datasets:
for tokenization in tokenizations:
exp_name = f'{dataset}_{tokenization}'
baselines = [Baseline('noBPE' if bf == 0 else f'bpe{bf}', exp_name, dataset, bi, tokenization, bf,
gen_model_conf) for bi, bf in enumerate(bpe_factors)]
for token_combination in ['PVm', 'PVDm']:
baselines += [Baseline(token_combination, exp_name, dataset, len(baselines),
f'{tokenization}{token_combination}', 0, gen_model_conf)]
if tokenization == 'REMI': # adds CPWord and Octuple for comparison
for tok, embed_sizes in [('CPWord', EMBED_SIZES_CP), ('OctupleMono', EMBED_SIZES_OCTUPLE)]:
baselines += [Baseline(tok, exp_name, f'{dataset}-short' if tok == 'OctupleMono' else dataset,
len(baselines), tok, 0, ModelConfig(embed_sizes=embed_sizes))]
experiments.append(Experiment(baselines, dataset, tokenization, SEED, cla_model_conf, gen_train_conf,
cla_pre_train_conf, cla_fine_tune_conf, gen_test_conf))
if __name__ == '__main__':
from training import train_all_gen
for exp_ in experiments:
for baseline in exp_.baselines:
baseline.load_tokenizer()
for exp_ in experiments:
train_all_gen(exp_)