Revert "Fix process_weights_after_loading for fp8 dense"

guyueh1 · guyueh1 · commit f9bc2caa456a · 2025-11-09T14:56:33.000-08:00
This reverts commit 41abdf1.
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
@@ -1321,7 +1321,6 @@ def grpo_train(
             print("\n📊 Training Results:")
 
             print(f"  • Loss: {metrics['loss']:.4f}")
-            print(f"  • Token Mult Prob Error: {metrics['token_mult_prob_error']:.4f}")
             if master_config["grpo"]["use_dynamic_sampling"]:
                 print(f"  • Avg Filtered Reward: {np.mean(rewards.numpy()):.4f}")
                 print(
diff --git a/nemo_rl/models/generation/fp8.py b/nemo_rl/models/generation/fp8.py
@@ -301,7 +301,7 @@ def load_weights(weights, model_runner):
         )
         param_scale = torch.squeeze(param_scale, dim=-1)
         weights_quantized.append([k, param_lp])
-        weights_quantized.append([k + "_scale", param_scale])
+        weights_quantized.append([k + "_scale_inv", param_scale])
     # Monkey patch the param class to their subclass, as certain models
     # will check the param type to call the proper weightloader
     for name, param in model.named_parameters():
@@ -391,10 +391,6 @@ def cast_tensor_to_fp8_blockwise(
 
 def process_weights_after_loading(self, layer) -> None:
     from torch.nn import Parameter
-    from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-        maybe_post_process_fp8_weight_block,
-        process_fp8_weight_block_strategy,
-    )
     from vllm.model_executor.parameter import (
         BlockQuantScaleParameter,
         ModelWeightParameter,
@@ -420,34 +416,27 @@ def _create_param_from_subclass_attributes(custom_param):
         param.subclass_type = type(custom_param)
         return param
 
-    weight_scale = (
-        layer.weight_scale_inv
-        if hasattr(layer, "weight_scale_inv")
-        else layer.weight_scale
-    )
-    weight, weight_scale = process_fp8_weight_block_strategy(layer.weight, weight_scale)
+    weight = layer.weight.data
+    weight_scale_inv = layer.weight_scale_inv.data
+    weight = self._maybe_pad_weight(weight)
 
     layer.weight = _create_param_from_subclass_attributes(
         ModelWeightParameter(
-            data=weight.data,
+            data=weight,
             output_dim=0,
             input_dim=1,
             weight_loader=layer.weight.weight_loader,
         )
     )
-    layer.weight_scale = _create_param_from_subclass_attributes(
+    layer.weight_scale_inv = _create_param_from_subclass_attributes(
         BlockQuantScaleParameter(
-            data=weight_scale.data,
+            data=weight_scale_inv,
             output_dim=0,
             input_dim=1,
             weight_loader=layer.weight_scale_inv.weight_loader,
         )
     )
 
-    del layer.weight_scale_inv
-
-    maybe_post_process_fp8_weight_block(layer, self.cutlass_block_fp8_supported)
-
 
 @triton.jit
 def _per_token_group_quant_fp8(