File tree
197 files changed
+5140
-79
lines changed- .gitlab/stages
- tests
- functional_tests
- jet_recipes
- python_test_utils/jet
- shell_test_utils
- test_cases
- bert
- bert_mr_mcore_tp2_pp2_dgx_a100_1N8G
- bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G
- bert_mr_tp1_pp4_vp2_dgx_a100_1N8G
- bert_mr_tp2_pp2_dgx_a100_1N8G
- bert_nightly_dgx_a100_1N8G_mcore_tp1_pp2
- bert_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2
- bert_nightly_dgx_a100_1N8G_mcore_tp4_pp1
- bert_nightly_dgx_a100_1N8G_tp1_pp2
- bert_nightly_dgx_a100_1N8G_tp4_pp1
- gpt
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_2experts
- gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4
- gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_4experts
- gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts
- gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce
- gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch
- gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1
- gpt3_345m_weekly_dgx_h100_1N8G_mcore_nondet_tp1_pp1_fp8_no_model_parallel
- gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_bf16_baseline
- gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp1_fp8_no_model_parallel
- gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp1_pp2_fp8_pp
- gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp_sp
- gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_fp8_tp_pp
- gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp2_pp2_native_fp8_tp_pp_sp
- gpt3_345m_weekly_dgx_h100_1N8G_mcore_tp4_pp2_fp8_tp_pp
- gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp1_pp4_vp1_uneven_pipeline_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G
- gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G
- gpt3_mr_te_tp2_pp2_dgx_a100_1N8G
- gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G
- gpt3_mr_tp2_pp2_dgx_a100_1N8G
- multimodal-llava
- multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp2_pp3_dgx_a100_1N8G
- multimodal_llava_mr_mcore_te_tp4_pp1_etp3_dgx_a100_1N7G
- t5
- t5_220m_mr_mcore_te_tp2_pp2_dgx_a100_1N8G
- t5_220m_mr_mcore_te_tp4_pp1_dgx_a100_1N8G
- t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G
- t5_220m_mr_mcore_tp4_pp1_dgx_a100_1N8G
- t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1_sequence_parallel
- t5_220m_weekly_dgx_a100_1N8G_mcore_te_tp2_pp1_vp1
- t5_220m_weekly_dgx_a100_1N8G_mcore_tp1_pp1_vp1
- t5_220m_weekly_dgx_a100_1N8G_mcore_tp2_pp1_vp1
- unit_tests
- dist_checkpointing
- models
- models
- transformer
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
197 files changed
+5140
-79
lines changedLines changed: 10 additions & 8 deletions
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
23 | 23 |
| |
24 | 24 |
| |
25 | 25 |
| |
26 |
| - | |
| 26 | + | |
27 | 27 |
| |
28 | 28 |
| |
29 | 29 |
| |
| |||
92 | 92 |
| |
93 | 93 |
| |
94 | 94 |
| |
95 |
| - | |
96 |
| - | |
| 95 | + | |
| 96 | + | |
97 | 97 |
| |
98 | 98 |
| |
| 99 | + | |
| 100 | + | |
99 | 101 |
| |
100 | 102 |
| |
101 | 103 |
| |
| |||
109 | 111 |
| |
110 | 112 |
| |
111 | 113 |
| |
| 114 | + | |
| 115 | + | |
| 116 | + | |
112 | 117 |
| |
113 | 118 |
| |
114 | 119 |
| |
| |||
118 | 123 |
| |
119 | 124 |
| |
120 | 125 |
| |
121 |
| - | |
| 126 | + | |
122 | 127 |
| |
123 |
| - | |
124 |
| - | |
125 |
| - | |
126 |
| - | |
| 128 | + | |
127 | 129 |
| |
128 | 130 |
| |
129 | 131 |
| |
|
Lines changed: 63 additions & 8 deletions
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
51 | 51 |
| |
52 | 52 |
| |
53 | 53 |
| |
| 54 | + | |
54 | 55 |
| |
55 | 56 |
| |
| 57 | + | |
56 | 58 |
| |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
57 | 69 |
| |
58 |
| - | |
59 |
| - | |
| 70 | + | |
| 71 | + | |
60 | 72 |
| |
61 | 73 |
| |
62 | 74 |
| |
63 |
| - | |
| 75 | + | |
| 76 | + | |
64 | 77 |
| |
65 | 78 |
| |
66 |
| - | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
| 91 | + | |
| 92 | + | |
| 93 | + | |
| 94 | + | |
| 95 | + | |
| 96 | + | |
| 97 | + | |
67 | 98 |
| |
68 | 99 |
| |
69 | 100 |
| |
70 | 101 |
| |
71 | 102 |
| |
72 |
| - | |
| 103 | + | |
73 | 104 |
| |
74 | 105 |
| |
75 | 106 |
| |
| |||
81 | 112 |
| |
82 | 113 |
| |
83 | 114 |
| |
84 |
| - | |
| 115 | + | |
85 | 116 |
| |
86 | 117 |
| |
87 |
| - | |
| 118 | + | |
88 | 119 |
| |
89 | 120 |
| |
90 | 121 |
| |
| |||
96 | 127 |
| |
97 | 128 |
| |
98 | 129 |
| |
99 |
| - | |
| 130 | + | |
100 | 131 |
| |
101 | 132 |
| |
102 | 133 |
| |
| |||
106 | 137 |
| |
107 | 138 |
| |
108 | 139 |
| |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
| 163 | + |
Lines changed: 7 additions & 8 deletions
Original file line number | Diff line number | Diff line change | |
---|---|---|---|
| |||
11 | 11 |
| |
12 | 12 |
| |
13 | 13 |
| |
14 |
| - | |
| 14 | + | |
15 | 15 |
| |
16 | 16 |
| |
17 | 17 |
| |
| |||
23 | 23 |
| |
24 | 24 |
| |
25 | 25 |
| |
26 |
| - | |
27 |
| - | |
28 |
| - | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
29 | 29 |
| |
30 | 30 |
| |
31 | 31 |
| |
| |||
40 | 40 |
| |
41 | 41 |
| |
42 | 42 |
| |
43 |
| - | |
44 |
| - | |
45 |
| - | |
46 |
| - | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
47 | 46 |
| |
48 | 47 |
| |
49 | 48 |
| |
|
0 commit comments