Upgrade pylint to 3.3.1 (#1257)

* Upgrade pylint and first round formatting Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * round 2 Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * round 3 Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * Format and fixes Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * Paddle lint Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * Reviews Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * FIxes Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * More linting Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * Run formatter Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * Paddle lint Signed-off-by: Kirthi Shankar Sivamani <[email protected]> * Fixes Signed-off-by: Kirthi Shankar Sivamani <[email protected]> --------- Signed-off-by: Kirthi Shankar Sivamani <[email protected]>
NVIDIA · Oct 16, 2024 · 6e90fcb · 6e90fcb
1 parent 161b1d9
commit 6e90fcb
Show file tree

Hide file tree

Showing 36 changed files with 306 additions and 160 deletions.
diff --git a/pylintrc b/pylintrc
@@ -8,7 +8,9 @@ extension-pkg-whitelist=flash_attn_2_cuda,
 extension-pkg-allow-list=transformer_engine.transformer_engine_jax
 
 disable=too-many-locals,
+        too-few-public-methods,
         too-many-public-methods,
+        too-many-positional-arguments,
         invalid-name,
         too-many-arguments,
         abstract-method,

diff --git a/qa/L0_jax_lint/test.sh b/qa/L0_jax_lint/test.sh
@@ -6,7 +6,7 @@ set -e
 
 : "${TE_PATH:=/opt/transformerengine}"
 
-pip install cpplint==1.6.0 pylint==2.13.5
+pip install cpplint==1.6.0 pylint==3.3.1
 if [ -z "${PYTHON_ONLY}" ]
 then
   cd $TE_PATH

diff --git a/qa/L0_paddle_lint/test.sh b/qa/L0_paddle_lint/test.sh
@@ -6,7 +6,7 @@ set -e
 
 : "${TE_PATH:=/opt/transformerengine}"
 
-pip install cpplint==1.6.0 pylint==2.13.5
+pip install cpplint==1.6.0 pylint==3.3.1
 if [ -z "${PYTHON_ONLY}" ]
 then
   cd $TE_PATH

diff --git a/qa/L0_pytorch_lint/test.sh b/qa/L0_pytorch_lint/test.sh
@@ -6,7 +6,7 @@ set -e
 
 : "${TE_PATH:=/opt/transformerengine}"
 
-pip install cpplint==1.6.0 pylint==2.13.5
+pip install cpplint==1.6.0 pylint==3.3.1
 if [ -z "${PYTHON_ONLY}" ]
 then
   cd $TE_PATH

diff --git a/transformer_engine/paddle/cpp_extensions.py b/transformer_engine/paddle/cpp_extensions.py
@@ -583,6 +583,7 @@ def fused_attn_fwd_qkvpacked(
         fused_attention_backend != FusedAttnBackend["No_Backend"]
     ), "Fused attention does not support this input combination."
 
+    rng_elts_per_thread = None
     # BF16/FP16 fused attention API from fmha_v1 apex
     if fused_attention_backend == FusedAttnBackend["F16_max512_seqlen"]:
         rng_elts_per_thread = (
@@ -773,6 +774,7 @@ def fused_attn_fwd_kvpacked(
         fused_attention_backend != FusedAttnBackend["No_Backend"]
     ), "Fused attention does not support this input combination."
 
+    rng_elts_per_thread = None
     # BF16/FP16 fused attention API from fmha_v1 apex
     if fused_attention_backend == FusedAttnBackend["F16_max512_seqlen"]:
         rng_elts_per_thread = (
@@ -982,6 +984,7 @@ def fused_attn_fwd(
         fused_attention_backend != FusedAttnBackend["No_Backend"]
     ), "Fused attention does not support this input combination."
 
+    rng_elts_per_thread = None
     # BF16/FP16 fused attention API from fmha_v1 apex
     if fused_attention_backend == FusedAttnBackend["F16_max512_seqlen"]:
         rng_elts_per_thread = (

diff --git a/transformer_engine/paddle/fp8_buffer.py b/transformer_engine/paddle/fp8_buffer.py
@@ -100,6 +100,7 @@ def _reduce_tensor_across_group_op_max(tensor, group, sync_op):
             self._dp_amax_reduce_interval = int(os.getenv("NVTE_DP_AMAX_REDUCE_INTERVAL", "1"))
 
         tp_amax_reduce = False
+        reduce_group = -1  # Set value that will raise error if not set. `None` is a valid group.
         if self._dp_amax_reduce_idx == 0:
             reduce_group = fp8_meta["fp8_group"]
         else:

diff --git a/transformer_engine/paddle/layer/attention.py b/transformer_engine/paddle/layer/attention.py
@@ -1008,6 +1008,7 @@ def forward(
         else:
             raise ValueError(f"hidden_states should have 2 or 3 dimensions, got {input_dim}.")
 
+        layernorm_output = None
         if self.attention_type == "self":
             if self.input_layernorm:
                 layernorm_qkv_outputs = self.layernorm_qkv(

diff --git a/transformer_engine/paddle/layer/layernorm_mlp.py b/transformer_engine/paddle/layer/layernorm_mlp.py
@@ -266,6 +266,8 @@ def _mlp_backward(
             accumulate_wgrad_into_param_main_grad,
         )
 
+        dgelu_t = None
+        fc1_bgrad_ = None
         if activation == "gelu":
             # GELU Bwd
             dgelu, dgelu_t, fc1_bgrad_ = dgelu_cast_transpose_bgrad_fp8(