-
Notifications
You must be signed in to change notification settings - Fork 0
/
finetune_best2010lm_tfbase12_enc_s2s_wisesight.json
84 lines (81 loc) · 1.92 KB
/
finetune_best2010lm_tfbase12_enc_s2s_wisesight.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
{
"model": {
"class": "SequenceTransferLearningWrapper",
"config": {
"output_class_num": 4,
"encoder_checkpoint": "_outputs_/best2010lm_tfbase12_enc_s2s_lrscale0.25/checkpoint/best_weight.h5",
"train_encoder": true,
"max_input_length": 256,
"drop_out": 0.4,
"cached_data_dir": "_cache_",
"encoder_model": {
"class": "TransformerEncoderOnlyWrapper",
"config": {
"len_limit": 256,
"d_model": 512,
"d_inner_hid": 2048,
"n_head": 8,
"d_k": 512,
"d_v": 512,
"layers": 12,
"dropout": 0.1,
"share_word_emb": true,
"max_input_length": 256,
"train_mask_only": true,
"cached_data_dir": "_cache_"
}
},
"encoder_dict_dataset": {
"class": "BEST2010LMDatasetWrapper",
"config": {
"base_data_dir": "_tmp_"
}
}
}
},
"dataset": {
"class": "WisesightDatasetWrapper",
"config": {
"base_data_dir": "_tmp_"
}
},
"input_transform": {
"class": "FullWordRandomMaskWrapper",
"config": {
"column_id": 0,
"percent_mask": 0,
"percent_mask_correct": 0,
"percent_mask_incorrect": 0,
"clf_pos_offset": 0,
"clf_id": 3
}
},
"output_transform": {
"class": "SingleClassTransformWrapper",
"config": {
"column_id": 1
}
},
"callbacks": [
{
"class": "DynamicLearningRateWrapper",
"config": {
"d_model": 512,
"warmup": 50000,
"scale" : 0.5
}
}
],
"execution": {
"config": {
"optimizer": "adam",
"optimizer_params": [0.0001, 0.9, 0.98, 1e-9],
"batch_size": 32,
"epochs": 100,
"watch_metric": "val_acc",
"output_dir": "_outputs_/finetune_best2010lm_tfbase12_enc_s2s_wisesight",
"save_weight_history": false,
"resume_if_possible": true
}
}
}