@@ -6,43 +6,10 @@ dataset_mix="hamishivi/rlvr_orz_math_57k_collected 56878"
66# all evals
77# evals="minerva_math::hamish_zs_reasoning,gsm8k::zs_cot_latex,gsm8k::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,zebralogic::hamish_zs_reasoning,aime::hamish_zs_reasoning,agi_eval_english:0shot_cot::hamish_zs_reasoning,gpqa:0shot_cot::hamish_zs_reasoning,ifeval::hamish_zs_reasoning,popqa::hamish_zs_reasoning,mmlu:cot::hamish_zs_reasoning,alpaca_eval_v3::hamish_zs_reasoning,bbh:cot::hamish_zs_reasoning,mbppplus:0-shot-chat::tulu-thinker,codex_humanevalplus:0-shot-chat-v1::tulu-thinker"
88# math evals
9- evals=" minerva_math::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,aime::hamish_zs_reasoning "
9+ evals=" minerva_math::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,aime:zs_cot_r1::pass_at_32_2024_temp1,aime:zs_cot_r1::pass_at_32_2025_temp1 "
1010
11- # all I've changed with the checkpoints is the config.json, model_type=olmo3 and architectures is OLMo3ForCausalLM
12- # jacob tulu sft
13- # model_name_or_path="/weka/oe-adapt-default/michaeln/olmo3/olmo3_reasoning-anneal-tulu3sft-olmo2hparams__8__1751523764/"
14- # midtraining no reasoning
15- # model_name_or_path="/weka/oe-adapt-default/michaeln/olmo3/anneal-round1-100B-olmo3_7b_no-reasoning-anneal-3c193128_step47684"
16- # midtraining with reasoning
17- # model_name_or_path="/weka/oe-adapt-default/michaeln/olmo3/anneal-round1-100B-olmo3_7b_with-reasoning-anneal-9d6f76b0_step47684"
18- # micro anneals
19- # model_name_or_path="/weka/oe-adapt-default/allennlp/deletable_checkpoint/michaeln/olmo3_microanneal-finemath-643cecc4_step4769-hf"
20-
21- # model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/kylel/baseline-olmo2_7b-928646-anneal-100B-dolma2-round1-alldressed-17b22b3a/step47684-hf"
22- # gs_model_name="olmo2-alldressed-midtraingin"
23-
24- # model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round1-100B-olmo3_7b_with-reasoning-anneal-12T-3d39e871/step47684-hf"
25- # gs_model_name="olmo3-midtraining-round1"
26-
27- # model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round2-100B-olmo3_7b_with-reasoning-anneal-12T-53f443c7/step47684-hf"
28- #
29- # model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round3-webround2-100B-olmo3_7b_with-reasoning-anneal-12T-302b1ae8/step47684-hf"
30- # gs_model_name="olmo3-midtraining-round3"
31-
32- # model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/ianm/decon-anneal-round3-webround2-100B-olmo3_7b_with-reasoning-anneal-12T-fc803782/step47684-hf"
33- # gs_model_name="olmo3-midtraining-round3-decon"
34-
35- # model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round4-100B-olmo3_7b-anneal-decon-12T-081e9449/step47684-hf"
36- # gs_model_name="olmo3-midtraining-round4"
37-
38- # model_name_or_path="/weka/oe-adapt-default/jacobm/checkpoints/olmo2-7B-sft/olmo3-hparam-search/olmo3-12t-r3-1e-4-2_epochs-olmo2-tulu3-mix-num_3"
39- # gs_model_name="olmo3-midtraining-round3-jacobsft-num3"
40-
41- # model_name_or_path="/weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round5-100B-olmo3_7b-anneal-decon-12T-00bb6023/step47684-hf"
42- # gs_model_name="olmo3-midtraining-round5"
43-
44- model_name_or_path=" /weka/oe-adapt-default/jacobm/checkpoints/olmo2-7B-sft/olmo3-hparam-search/olmo3-12t-r5-100b-olmo2-tulu3-mix-num_3/"
45- gs_model_name=" olmo3-midtraining-round5-jacobsft-mix3"
11+ model_name_or_path=" /weka/oe-training-default/ai2-llm/checkpoints/OLMo3-midtraining/anneal-round5-100B-olmo3_7b-anneal-decon-12T-00bb6023/step47684-hf"
12+ gs_model_name=" olmo3-midtraining-round5"
4613
4714exp_name=" grpo_mathonly_1m_${gs_model_name} "
4815EXP_NAME=${EXP_NAME:- ${exp_name} }
@@ -60,7 +27,7 @@ python mason.py \
6027 --workspace ai2/tulu-thinker \
6128 --priority high \
6229 --pure_docker_mode \
63- --image michaeln/open_instruct_dev_uv_olmo3 \
30+ --image michaeln/open_instruct_olmo3 \
6431 --preemptible \
6532 --num_nodes 2 \
6633 --env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
@@ -75,10 +42,10 @@ python open_instruct/grpo_fast.py \
7542 --exp_name ${EXP_NAME} \
7643 --beta 0.0 \
7744 --num_samples_per_prompt_rollout 16 \
78- --num_unique_prompts_rollout 64 \
45+ --num_unique_prompts_rollout 128 \
7946 --num_mini_batches 4 \
8047 --num_epochs 1 \
81- --learning_rate 5e-7 \
48+ --learning_rate 1e-6 \
8249 --per_device_train_batch_size 1 \
8350 --kl_estimator kl3 \
8451 --dataset_mixer_list ${dataset_mix} \
@@ -101,9 +68,9 @@ python open_instruct/grpo_fast.py \
10168 --lr_scheduler_type constant \
10269 --apply_verifiable_reward true \
10370 --seed 1 \
104- --num_evals 5 \
105- --save_freq 100 \
106- --checkpoint_state_freq 50 \
71+ --local_eval_every 25 \
72+ --save_freq 25 \
73+ --checkpoint_state_freq 25 \
10774 --gradient_checkpointing \
10875 --with_tracking \
10976 --vllm_enable_prefix_caching \
0 commit comments