Skip to content

Commit 31136c4

Browse files
committed
cleaner olmo3 script
1 parent 975323f commit 31136c4

File tree

1 file changed

+9
-42
lines changed

1 file changed

+9
-42
lines changed

scripts/train/rlvr/grpo_olmo3.sh

Lines changed: 9 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,43 +6,10 @@ dataset_mix="hamishivi/rlvr_orz_math_57k_collected 56878"
66
# all evals
77
# evals="minerva_math::hamish_zs_reasoning,gsm8k::zs_cot_latex,gsm8k::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,zebralogic::hamish_zs_reasoning,aime::hamish_zs_reasoning,agi_eval_english:0shot_cot::hamish_zs_reasoning,gpqa:0shot_cot::hamish_zs_reasoning,ifeval::hamish_zs_reasoning,popqa::hamish_zs_reasoning,mmlu:cot::hamish_zs_reasoning,alpaca_eval_v3::hamish_zs_reasoning,bbh:cot::hamish_zs_reasoning,mbppplus:0-shot-chat::tulu-thinker,codex_humanevalplus:0-shot-chat-v1::tulu-thinker"
88
# math evals
9-
evals="minerva_math::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,aime::hamish_zs_reasoning"
9+
evals="minerva_math::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,aime:zs_cot_r1::pass_at_32_2024_temp1,aime:zs_cot_r1::pass_at_32_2025_temp1"
1010

11-
# all I've changed with the checkpoints is the config.json, model_type=olmo3 and architectures is OLMo3ForCausalLM
12-
# jacob tulu sft
13-
# model_name_or_path="/weka/oe-adapt-default/michaeln/olmo3/olmo3_reasoning-anneal-tulu3sft-olmo2hparams__8__1751523764/"
14-
# midtraining no reasoning
15-
# model_name_or_path="/weka/oe-adapt-default/michaeln/olmo3/anneal-round1-100B-olmo3_7b_no-reasoning-anneal-3c193128_step47684"
16-
# midtraining with reasoning
17-
# model_name_or_path="/weka/oe-adapt-default/michaeln/olmo3/anneal-round1-100B-olmo3_7b_with-reasoning-anneal-9d6f76b0_step47684"
18-
# micro anneals
19-
# model_name_or_path="/weka/oe-adapt-default/allennlp/deletable_checkpoint/michaeln/olmo3_microanneal-finemath-643cecc4_step4769-hf"
20-
21-
# model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/kylel/baseline-olmo2_7b-928646-anneal-100B-dolma2-round1-alldressed-17b22b3a/step47684-hf"
22-
# gs_model_name="olmo2-alldressed-midtraingin"
23-
24-
# model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round1-100B-olmo3_7b_with-reasoning-anneal-12T-3d39e871/step47684-hf"
25-
# gs_model_name="olmo3-midtraining-round1"
26-
27-
# model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round2-100B-olmo3_7b_with-reasoning-anneal-12T-53f443c7/step47684-hf"
28-
#
29-
# model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round3-webround2-100B-olmo3_7b_with-reasoning-anneal-12T-302b1ae8/step47684-hf"
30-
# gs_model_name="olmo3-midtraining-round3"
31-
32-
# model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/ianm/decon-anneal-round3-webround2-100B-olmo3_7b_with-reasoning-anneal-12T-fc803782/step47684-hf"
33-
# gs_model_name="olmo3-midtraining-round3-decon"
34-
35-
# model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round4-100B-olmo3_7b-anneal-decon-12T-081e9449/step47684-hf"
36-
# gs_model_name="olmo3-midtraining-round4"
37-
38-
# model_name_or_path="/weka/oe-adapt-default/jacobm/checkpoints/olmo2-7B-sft/olmo3-hparam-search/olmo3-12t-r3-1e-4-2_epochs-olmo2-tulu3-mix-num_3"
39-
# gs_model_name="olmo3-midtraining-round3-jacobsft-num3"
40-
41-
# model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round5-100B-olmo3_7b-anneal-decon-12T-00bb6023/step47684-hf"
42-
# gs_model_name="olmo3-midtraining-round5"
43-
44-
model_name_or_path="/weka/oe-adapt-default/jacobm/checkpoints/olmo2-7B-sft/olmo3-hparam-search/olmo3-12t-r5-100b-olmo2-tulu3-mix-num_3/"
45-
gs_model_name="olmo3-midtraining-round5-jacobsft-mix3"
11+
model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round5-100B-olmo3_7b-anneal-decon-12T-00bb6023/step47684-hf"
12+
gs_model_name="olmo3-midtraining-round5"
4613

4714
exp_name="grpo_mathonly_1m_${gs_model_name}"
4815
EXP_NAME=${EXP_NAME:-${exp_name}}
@@ -60,7 +27,7 @@ python mason.py \
6027
--workspace ai2/tulu-thinker \
6128
--priority high \
6229
--pure_docker_mode \
63-
--image michaeln/open_instruct_dev_uv_olmo3 \
30+
--image michaeln/open_instruct_olmo3 \
6431
--preemptible \
6532
--num_nodes 2 \
6633
--env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
@@ -75,10 +42,10 @@ python open_instruct/grpo_fast.py \
7542
--exp_name ${EXP_NAME} \
7643
--beta 0.0 \
7744
--num_samples_per_prompt_rollout 16 \
78-
--num_unique_prompts_rollout 64 \
45+
--num_unique_prompts_rollout 128 \
7946
--num_mini_batches 4 \
8047
--num_epochs 1 \
81-
--learning_rate 5e-7 \
48+
--learning_rate 1e-6 \
8249
--per_device_train_batch_size 1 \
8350
--kl_estimator kl3 \
8451
--dataset_mixer_list ${dataset_mix} \
@@ -101,9 +68,9 @@ python open_instruct/grpo_fast.py \
10168
--lr_scheduler_type constant \
10269
--apply_verifiable_reward true \
10370
--seed 1 \
104-
--num_evals 5 \
105-
--save_freq 100 \
106-
--checkpoint_state_freq 50 \
71+
--local_eval_every 25 \
72+
--save_freq 25 \
73+
--checkpoint_state_freq 25 \
10774
--gradient_checkpointing \
10875
--with_tracking \
10976
--vllm_enable_prefix_caching \

0 commit comments

Comments
 (0)