ADLR/megatron-lm!2310 - tests: Re-enable CP tests

NVIDIA · Nov 6, 2024 · 358fbcf · 358fbcf
1 parent f39c48d
commit 358fbcf
Show file tree

Hide file tree

Showing 5 changed files with 12 additions and 4 deletions.
diff --git a/tests/functional_tests/jet_recipes/gpt.yaml b/tests/functional_tests/jet_recipes/gpt.yaml
@@ -101,11 +101,11 @@ products:
     - gpt3_mr_tp2_pp2_dgx_a100_1N8G
     - gpt3_mr_mcore_te_tp2_pp2_defer_embedding_wgrad_compute_dgx_a100_1N8G
     - gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_dgx_a100_1N8G
-    # - gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G  # cp and attention
-    # - gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G  # cp and attention
-    # - gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G  # cp and attention
-    # - gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G  # cp and attention
     - gpt3_mr_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_dgx_a100_1N8G
+    - gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G  # cp and attention
+    - gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G  # cp and attention
+    - gpt3_mr_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G  # cp and attention
+    - gpt3_mr_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G  # cp and attention    
   - environment: [lts, dev]
     scope: [nightly]
     platforms: [dgx_a100]

diff --git a/...t_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/...t_cases/gpt/gpt3_mr_mcore_te_tp2_pp1_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,8 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  NVTE_FUSED_ATTN: 0
+  NVTE_FLASH_ATTN: 1
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512

diff --git a/...r_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/...r_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,8 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  NVTE_FUSED_ATTN: 0
+  NVTE_FLASH_ATTN: 1
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512

diff --git a/...t_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/...t_cases/gpt/gpt3_mr_mcore_te_tp2_pp2_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,8 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  NVTE_FUSED_ATTN: 0
+  NVTE_FLASH_ATTN: 1
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512

diff --git a/...r_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml b/...r_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic_dgx_a100_1N8G/model_config.yaml
@@ -1,6 +1,8 @@
 ENV_VARS:
   CUDA_DEVICE_MAX_CONNECTIONS: 1
   NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
+  NVTE_FUSED_ATTN: 0
+  NVTE_FLASH_ATTN: 1
 MODEL_ARGS:
   --num-layers: 12
   --hidden-size: 512