Disable UB bulk wgrad when weights are frozen (#702)

Signed-off-by: Jaemin Choi <[email protected]> Co-authored-by: Jaemin Choi <[email protected]>
NVIDIA · Mar 5, 2024 · b0f6535 · b0f6535
1 parent 3f8baf9
commit b0f6535
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/transformer_engine/pytorch/module/layernorm_linear.py b/transformer_engine/pytorch/module/layernorm_linear.py
@@ -355,7 +355,7 @@ def backward(
 
             if ctx.ub_bulk_wgrad:
                 tp_world_size = get_distributed_world_size(ctx.tp_group)
-                if tp_world_size == 1:
+                if tp_world_size == 1 or not weight.requires_grad:
                     ctx.ub_bulk_wgrad = False
 
             # Column Parallel Linear

diff --git a/transformer_engine/pytorch/module/layernorm_mlp.py b/transformer_engine/pytorch/module/layernorm_mlp.py
@@ -604,7 +604,7 @@ def backward(
 
             if ctx.ub_bulk_wgrad:
                 tp_world_size = get_distributed_world_size(ctx.tp_group)
-                if tp_world_size == 1:
+                if tp_world_size == 1 or not fc1_weight.requires_grad:
                     ctx.ub_bulk_wgrad = False
             # Column Parallel Linear
             # Overlap input AG with dgrad