File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change 6
6
7
7
from olmo_core .config import DType
8
8
from olmo_core .distributed .parallel import DataParallelType
9
+ from olmo_core .float8 import Float8Config
9
10
from olmo_core .internal .experiment import CommonComponents , main
10
11
from olmo_core .nn .transformer import (
11
12
TransformerActivationCheckpointingConfig ,
@@ -30,13 +31,14 @@ def build_model_config(common: CommonComponents) -> TransformerConfig:
30
31
ac_config = TransformerActivationCheckpointingConfig (
31
32
mode = TransformerActivationCheckpointingMode .full
32
33
),
34
+ float8_config = Float8Config (compile = True ),
33
35
)
34
36
35
37
36
38
def build_optim_config (common : CommonComponents ) -> AdamWConfig :
37
39
del common
38
40
return AdamWConfig (
39
- lr = 3e -4 ,
41
+ lr = 6e -4 ,
40
42
weight_decay = 0.1 ,
41
43
betas = (0.9 , 0.95 ),
42
44
group_overrides = [
@@ -50,7 +52,7 @@ def build_trainer_config(common: CommonComponents) -> TrainerConfig:
50
52
return (
51
53
TrainerConfig (
52
54
save_folder = common .save_folder ,
53
- rank_microbatch_size = 1 * 4096 ,
55
+ rank_microbatch_size = 4 * 4096 ,
54
56
save_overwrite = True ,
55
57
metrics_collect_interval = 10 ,
56
58
cancel_check_interval = 1 ,
You can’t perform that action at this time.
0 commit comments