diff --git a/tests/functional_tests/jet_recipes/bert.yaml b/tests/functional_tests/jet_recipes/bert.yaml
index 99bcb4e2e1..75aac2faab 100644
--- a/tests/functional_tests/jet_recipes/bert.yaml
+++ b/tests/functional_tests/jet_recipes/bert.yaml
@@ -29,7 +29,7 @@ spec:
 
 products:
   - scope: [mr]
-    time_limit: [1200]
+    time_limit: [12000]
     test_case: 
     - bert_mr_mcore_tp2_pp2_dgx_a100_1N8G
     - bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G
diff --git a/tests/functional_tests/jet_recipes/gpt-nemo.yaml b/tests/functional_tests/jet_recipes/gpt-nemo.yaml
index 9f5650842e..87a6fb2c23 100644
--- a/tests/functional_tests/jet_recipes/gpt-nemo.yaml
+++ b/tests/functional_tests/jet_recipes/gpt-nemo.yaml
@@ -9,7 +9,7 @@ spec:
   nodes: 1
   gpus: 8
   platforms: dgx_a100
-  time_limit: 1200
+  time_limit: 12000
   scope: null
   script: |-
     ls
diff --git a/tests/functional_tests/jet_recipes/gpt.yaml b/tests/functional_tests/jet_recipes/gpt.yaml
index e7098277a1..abaef86b81 100644
--- a/tests/functional_tests/jet_recipes/gpt.yaml
+++ b/tests/functional_tests/jet_recipes/gpt.yaml
@@ -29,7 +29,7 @@ spec:
 products:
   - scope: [mr]
     platforms: [dgx_a100]
-    time_limit: [1200]
+    time_limit: [12000]
     test_case:
     - gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G
     - gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G
diff --git a/tests/functional_tests/jet_recipes/multimodal-llava.yaml b/tests/functional_tests/jet_recipes/multimodal-llava.yaml
index 6b8302b03a..7a20b1145a 100644
--- a/tests/functional_tests/jet_recipes/multimodal-llava.yaml
+++ b/tests/functional_tests/jet_recipes/multimodal-llava.yaml
@@ -9,7 +9,7 @@ spec:
   nodes: 1
   gpus: 8
   platforms: dgx_a100
-  time_limit: 1200
+  time_limit: 12000
   scope: null
   script: |-
     ls
diff --git a/tests/functional_tests/jet_recipes/t5.yaml b/tests/functional_tests/jet_recipes/t5.yaml
index 87d2a476ac..947023b0eb 100644
--- a/tests/functional_tests/jet_recipes/t5.yaml
+++ b/tests/functional_tests/jet_recipes/t5.yaml
@@ -29,7 +29,7 @@ spec:
 
 products:
   - scope: [mr]
-    time_limit: [1200]
+    time_limit: [12000]
     test_case:
     - t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G
     - t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G
diff --git a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
index 073585dee6..26d377fd02 100644
--- a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
diff --git a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml
index eb64af65e3..3a6d46e00d 100644
--- a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_vp2/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
diff --git a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
index 598aa59793..24b9147500 100644
--- a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
diff --git a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_tp1_pp2/model_config.yaml
index 4cdfc1c44b..f372ca18ce 100644
--- a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_tp1_pp2/model_config.yaml
@@ -4,7 +4,7 @@ ENV_VARS:
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
   NVTE_APPLY_QK_LAYER_SCALING: 1
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
diff --git a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_tp4_pp1/model_config.yaml
index 70846159d3..476366af7d 100644
--- a/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_tp4_pp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_345m_nightly_dgx_a100_1N8G_tp4_pp1/model_config.yaml
@@ -4,7 +4,7 @@ ENV_VARS:
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
   NVTE_APPLY_QK_LAYER_SCALING: 1
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
index da970b1b3e..b6b7359e5a 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
@@ -40,4 +41,4 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --ckpt-format: torch
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml
index f30342bb1c..9f5de1eb86 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_local_spec_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
@@ -35,10 +36,10 @@ MODEL_ARGS:
   --eval-iters: 10
   --tensor-model-parallel-size: 2
   --pipeline-model-parallel-size: 2
-  --spec: local  
-  --deterministic-mode: true 
+  --spec: local
+  --deterministic-mode: true
   --no-gradient-accumulation-fusion: true
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --ckpt-format: torch
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
index d71d2d5b87..1f3c1cf607 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
@@ -35,11 +36,11 @@ MODEL_ARGS:
   --eval-iters: 10
   --tensor-model-parallel-size: 2
   --pipeline-model-parallel-size: 2
-  --deterministic-mode: true  
-  --use-checkpoint-args: true  
+  --deterministic-mode: true
+  --use-checkpoint-args: true
   --use-checkpoint-opt_param-scheduler: true
   --no-gradient-accumulation-fusion: true
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --ckpt-format: torch
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml
index 9ffd3f164f..ade42ea6f4 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mr_mcore_tp2_pp2_resume_torch_dist_local_spec_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
@@ -35,12 +36,12 @@ MODEL_ARGS:
   --eval-iters: 10
   --tensor-model-parallel-size: 2
   --pipeline-model-parallel-size: 2
-  --spec: local  
-  --deterministic-mode: true  
-  --use-checkpoint-args: true  
+  --spec: local
+  --deterministic-mode: true
+  --use-checkpoint-args: true
   --use-checkpoint-opt_param-scheduler: true
   --no-gradient-accumulation-fusion: true
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
   --ckpt-format: torch
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_tp1_pp4_vp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mr_tp1_pp4_vp2_dgx_a100_1N8G/model_config.yaml
index cd18e14d0e..38fd703ccf 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_tp1_pp4_vp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mr_tp1_pp4_vp2_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
@@ -44,4 +45,4 @@ MODEL_ARGS:
   --fp16: true
   --apply-query-key-layer-scaling: true
   --ckpt-format: torch
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_tp1_pp4_vp2_resume_torch_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mr_tp1_pp4_vp2_resume_torch_dgx_a100_1N8G/model_config.yaml
index b7377a2397..041d95f9ba 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_tp1_pp4_vp2_resume_torch_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mr_tp1_pp4_vp2_resume_torch_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
@@ -37,13 +38,13 @@ MODEL_ARGS:
   --pipeline-model-parallel-size: 4
   --num-layers-per-virtual-pipeline-stage: 2
   --use-legacy-models: true
-  --transformer-impl: local  
-  --deterministic-mode: true  
-  --use-checkpoint-args: true  
+  --transformer-impl: local
+  --deterministic-mode: true
+  --use-checkpoint-args: true
   --use-checkpoint-opt_param-scheduler: true
   --no-gradient-accumulation-fusion: true
   --data-cache-path: ${DATA_CACHE_PATH}
-  --fp16: true  
+  --fp16: true
   --apply-query-key-layer-scaling: true
   --ckpt-format: torch
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mr_tp2_pp2_dgx_a100_1N8G/model_config.yaml
index 4d85d383ed..a2a39e49a3 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mr_tp2_pp2_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
@@ -36,11 +37,11 @@ MODEL_ARGS:
   --tensor-model-parallel-size: 2
   --pipeline-model-parallel-size: 2
   --use-legacy-models: true
-  --transformer-impl: local  
+  --transformer-impl: local
   --deterministic-mode: true
   --no-gradient-accumulation-fusion: true
   --data-cache-path: ${DATA_CACHE_PATH}
-  --fp16: true  
+  --fp16: true
   --apply-query-key-layer-scaling: true
   --ckpt-format: torch
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/bert/bert_mr_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/bert/bert_mr_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
index aa37109915..e65fc9cc0d 100644
--- a/tests/functional_tests/test_cases/bert/bert_mr_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/bert/bert_mr_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 24
   --hidden-size: 1024
@@ -36,13 +37,13 @@ MODEL_ARGS:
   --tensor-model-parallel-size: 2
   --pipeline-model-parallel-size: 2
   --use-legacy-models: true
-  --transformer-impl: local  
-  --deterministic-mode: true  
-  --use-checkpoint-args: true  
+  --transformer-impl: local
+  --deterministic-mode: true
+  --use-checkpoint-args: true
   --use-checkpoint-opt_param-scheduler: true
   --no-gradient-accumulation-fusion: true
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
   --ckpt-format: torch
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt-nemo/gpt3-nemo_126m_mr_mbs1_gbs8_mcore_te_tp2_pp4_vp3_seq_par_overlap_p2p_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt-nemo/gpt3-nemo_126m_mr_mbs1_gbs8_mcore_te_tp2_pp4_vp3_seq_par_overlap_p2p_dgx_a100_1N8G/model_config.yaml
index 9dfedbcd0a..89c71f6291 100644
--- a/tests/functional_tests/test_cases/gpt-nemo/gpt3-nemo_126m_mr_mbs1_gbs8_mcore_te_tp2_pp4_vp3_seq_par_overlap_p2p_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt-nemo/gpt3-nemo_126m_mr_mbs1_gbs8_mcore_te_tp2_pp4_vp3_seq_par_overlap_p2p_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   SKIP_PYTEST: 1
+  N_REPEATS: 1
 MODEL_ARGS:
   trainer.num_nodes: 1
   trainer.devices: 8
@@ -32,4 +33,4 @@ MODEL_ARGS:
   model.sequence_parallel: 'True'
   model.overlap_p2p_comm: 'True'
   model.batch_p2p_comm: 'False'
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt-nemo/gpt3-nemo_126m_mr_mbs4_gbs64_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt-nemo/gpt3-nemo_126m_mr_mbs4_gbs64_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml
index dd9d35ef86..d7e926e96e 100644
--- a/tests/functional_tests/test_cases/gpt-nemo/gpt3-nemo_126m_mr_mbs4_gbs64_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt-nemo/gpt3-nemo_126m_mr_mbs4_gbs64_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   SKIP_PYTEST: 1
+  N_REPEATS: 1
 MODEL_ARGS:
   trainer.num_nodes: 1
   trainer.devices: 8
@@ -29,4 +30,4 @@ MODEL_ARGS:
   model.optim.name: distributed_fused_adam
   model.optim.weight_decay: 0.1
   exp_manager.create_checkpoint_callback: 'False'
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index 62bc1cba5d..459270a1b2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
index e780aed0e1..dcb80dc007 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
index b2658b6a07..d94f5277d4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml
index 69e9eeed24..9f210d838f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_fp16/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
index e2d3762795..b943bfec0f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml
index 7b98858b84..108cb6b1a4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
index d5a6a9a130..1c2a42eaaa 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
index fc589f94fa..cb0214f264 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
index 08f556c1e2..97d3d8c5f0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
index 5dc534753c..1a15825731 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_2experts/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_2experts/model_config.yaml
index 34dd7657f0..c6728722e2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_2experts/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp2_pp2_te_2experts/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
index 3039779e57..37cc4615a5 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml
index 56dc883536..528b691a28 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
index 32ad67e2a4..4f5e8d93b7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
index 93f704b7d8..64d504bf29 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce/model_config.yaml
index f115e94c06..190e5777f2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp1_overlap_grad_reduce/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2/model_config.yaml
index 488589f9f2..99d0ac8f6b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch/model_config.yaml
index 7afec20da2..6242b2ebbc 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp2_resume_torch/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4/model_config.yaml
index 668241061c..81727e052d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce/model_config.yaml
index 75d0037f4f..525d0f2c90 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_overlap_grad_reduce/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch/model_config.yaml
index 176cd5d6de..516e1dd517 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_resume_torch/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce/model_config.yaml
index a683015714..10fc8c2f23 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp1_pp4_vp1_overlap_grad_reduce/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_4experts/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_4experts/model_config.yaml
index a995f9390f..ba219d4445 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_4experts/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_4experts/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce/model_config.yaml
index 460746e283..c547f47970 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_overlap_grad_reduce/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts/model_config.yaml
index c80b1c225c..72c98e80be 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_4experts/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce/model_config.yaml
index 99fac43c7f..03ddd8a7ca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp2_pp2_resume_torch_overlap_grad_reduce/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1/model_config.yaml
index 3b61ee4ea1..84128fa780 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce/model_config.yaml
index f25579efe1..b664115f27 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_overlap_grad_reduce/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch/model_config.yaml
index 8d61af2bb5..0ec5d88ad9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_345m_nightly_dgx_a100_1N8G_tp4_pp1_resume_torch/model_config.yaml
@@ -3,7 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
-  N_REPEATS: 10
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
index 80f727609f..ee84d93de2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index c4dd031c19..ffdaec80ad 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -50,4 +51,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
index 0af105d39d..9dd9e9ecd0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
index 6782b694cd..470ba6f926 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -50,4 +51,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
index fa5ce41aaa..fb07f9d30c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp1_uniform_full_recompute_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index 85941e4c7b..7cdb56dd00 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -52,4 +53,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml
index dc520751f8..7bdd0c46e2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
index f0070af373..b014fdabc0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -50,4 +51,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml
index b86c2fcb0d..b2a1643ec8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
index b8c0b09668..6c2c9e51ab 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
index 309398f123..2e0188551a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
index 995270875f..8fa10f4b9d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml
index 539e4312f0..c64a4ef5e7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
index f0e0581593..dda1876e1a 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_swiglu_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -50,4 +51,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
index 4cf91fb542..df7ba9fb3b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml
index c7c33314c3..479916c654 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_sequence_parallel_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml
index ae50df1ce8..20c57f0c95 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_swiglu_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
index a95d943f21..f7c52c997f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_untie_embeddings_and_outputs_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
index 4c2ef387c8..210febf448 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml
index 7725cd9caa..fd67df60ca 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_decoupled_lr_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
index f743e0943f..0c0bc85f61 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
index beae881c77..7a92bfd8cd 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
index cdff5e00b7..ef5b64d284 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -52,4 +53,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml
index 588c8a16f0..ca1de0ad37 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -53,4 +54,4 @@ MODEL_ARGS:
   --ckpt-format: torch
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
index d373d7ccf3..30137a040d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -50,4 +51,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml
index 4e1ad296ed..1513a18192 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
index 4e9cda0a24..077c9a36e8 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
index b4b28e9308..1ccbe1ae31 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
index ec4a2338a8..b9ca819495 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -50,4 +51,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
index 18dde2b9cb..25ea6c933b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -53,4 +54,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
index a125bbe7a6..7b7bc27f4b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -51,4 +52,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index 75791d64f3..7da0cc5ddd 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index 46d36da379..476a1b6b93 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dgx_a100_1N8G/model_config.yaml
index ba993c319d..613559a96e 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -53,4 +54,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index af724f5eb0..a1f86a64c7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -54,4 +55,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
index 688edd5164..fb5ed74f79 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -55,4 +56,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
index 32b1dd0ef4..cf4a90e410 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -58,4 +59,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
index 59ae9ff1e1..793bfb21d4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -55,4 +56,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dgx_a100_1N8G/model_config.yaml
index 30b994493e..29b87e9073 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -52,4 +53,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
index 322fc34b1d..c4b791a9d4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -53,4 +54,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
index 191ca9c652..c2631e84e0 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -54,4 +55,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
index 661775605d..bc5da0c312 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -57,4 +58,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
index 5043699d49..7c437e0b10 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_te_8experts2parallel_top2router_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -54,4 +55,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index 2fd4614dd8..dde8a620d3 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
index c28031708a..303182bcaf 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -44,4 +45,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
index 49530a366f..c08ce2e01c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
index 3bb836d36b..959c286a50 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
index 0dd40795b5..c9938b5ee1 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -46,4 +47,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
index dfe5b75e8e..23060e55e4 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
index 9827106b20..32bd642deb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
index b8e763eaf6..7d64cf477f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
index 63f5bc56a0..6014052dd6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -45,4 +46,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
index bcf5398612..6d8a590974 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
index 9a763b34ad..c304692d62 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
index 9074e6ce44..d8f1585ae2 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
index 7d1fff5f28..c02d1fdc67 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
index ab30aa8110..7d5b13b753 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml
index 4276fcf6cb..cff824669b 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -44,4 +45,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
index 104b69873c..8846dacb40 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
index 9f836b80b6..9295cdc580 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
index 42e81f7bcc..b8f1667cdb 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
index d17ae7a89e..d2888f767c 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
index fd13e7a0a2..27acfbee86 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -50,4 +51,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
index 8e205a2636..1ea30bae73 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
index 9916411c90..f3348d608d 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,7 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
index 282c7e07a5..fbb767cb14 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_resume_torch_dist_uninstall_te_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
index e2a87210ea..cf65df920f 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_mcore_tp2_pp2_uninstall_te_dgx_a100_1N8G/model_config.yaml
@@ -4,9 +4,8 @@ ENV_VARS:
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
   SKIP_PYTEST: 1
-BEFORE_SCRIPT:
-  pip uninstall -y transformer_engine
-  pip uninstall -y Apex ## TODO: remove once Apex dependency has been removed completely
+  N_REPEATS: 1
+BEFORE_SCRIPT: pip uninstall -y transformer_engine pip uninstall -y Apex ## TODO: remove once Apex dependency has been removed completely
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,8 +47,8 @@ MODEL_ARGS:
   --deterministic-mode: true
   --no-gradient-accumulation-fusion: true
   --use-mcore-models: true
-  --ckpt-format: torch_dist 
+  --ckpt-format: torch_dist
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
index 7d2cada241..af105662a9 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -46,4 +47,4 @@ MODEL_ARGS:
   --use-legacy-models: true
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
index 6735a087b1..3d27f95aa6 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_te_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --use-legacy-models: true
   --data-cache-path: ${DATA_CACHE_PATH}
   --bf16: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
index e4c082290e..1e6b07a429 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_resume_torch_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_resume_torch_dgx_a100_1N8G/model_config.yaml
index bbb14c899c..2ff5fc2224 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_resume_torch_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_tp1_pp4_vp1_resume_torch_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -48,4 +49,4 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_tp2_pp2_dgx_a100_1N8G/model_config.yaml
index b5881f04d2..4e4a963417 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_tp2_pp2_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -46,4 +47,4 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mr_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/gpt/gpt3_mr_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
index fca698dc0f..8d11e207e7 100644
--- a/tests/functional_tests/test_cases/gpt/gpt3_mr_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/gpt/gpt3_mr_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512
@@ -47,4 +48,4 @@ MODEL_ARGS:
   --data-cache-path: ${DATA_CACHE_PATH}
   --fp16: true
   --apply-query-key-layer-scaling: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml
index 496cedad25..6da0c3a85a 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp1_pp1_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 624
@@ -49,4 +50,4 @@ MODEL_ARGS:
   --img-w: 336
   --patch-dim: 14
   --mock-data: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp2_pp3_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp2_pp3_dgx_a100_1N8G/model_config.yaml
index 7574866666..816aa8bf1f 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp2_pp3_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp2_pp3_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 624
@@ -50,4 +51,4 @@ MODEL_ARGS:
   --img-w: 336
   --patch-dim: 14
   --mock-data: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_etp3_dgx_a100_1N7G/model_config.yaml b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_etp3_dgx_a100_1N7G/model_config.yaml
index eb82bff8a5..180e6beedd 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_etp3_dgx_a100_1N7G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_etp3_dgx_a100_1N7G/model_config.yaml
@@ -4,6 +4,7 @@ ENV_VARS:
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
   GPUS_PER_NODE: 7
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 624
@@ -52,4 +53,4 @@ MODEL_ARGS:
   --img-w: 336
   --patch-dim: 14
   --mock-data: true
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G/model_config.yaml b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G/model_config.yaml
index a56ded5f84..1fade8fd4e 100644
--- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G/model_config.yaml
+++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mr_mcore_te_tp4_pp1_resume_torch_etp3_dgx_a100_1N7G/model_config.yaml
@@ -4,6 +4,7 @@ ENV_VARS:
   NCCL_ALGO: Tree
   CUBLAS_WORKSPACE_CONFIG: :4096:8
   GPUS_PER_NODE: 7
+  N_REPEATS: 5
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 624
@@ -53,4 +54,4 @@ MODEL_ARGS:
   --img-w: 336
   --patch-dim: 14
   --mock-data: true
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
index 3a0a741e7a..8abace27d3 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: ^NVLS
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --encoder-num-layers: 12
   --decoder-num-layers: 12
@@ -51,4 +52,4 @@ MODEL_ARGS:
   --encoder-pipeline-model-parallel-size: 2
   --deterministic-mode: true
   --ckpt-format: torch
-TEST_TYPE: regular
\ No newline at end of file
+TEST_TYPE: regular
diff --git a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
index 2e06641f34..c1a6d51bf1 100644
--- a/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
+++ b/tests/functional_tests/test_cases/t5/t5_220m_mr_mcore_tp2_pp2_resume_torch_dgx_a100_1N8G/model_config.yaml
@@ -3,6 +3,7 @@ ENV_VARS:
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
   NCCL_ALGO: ^NVLS
   CUBLAS_WORKSPACE_CONFIG: :4096:8
+  N_REPEATS: 5
 MODEL_ARGS:
   --encoder-num-layers: 12
   --decoder-num-layers: 12
@@ -51,4 +52,4 @@ MODEL_ARGS:
   --encoder-pipeline-model-parallel-size: 2
   --deterministic-mode: true
   --ckpt-format: torch
-TEST_TYPE: ckpt-resume
\ No newline at end of file
+TEST_TYPE: ckpt-resume