polish the PR

youngeunkwon0405 · youngeunkwon0405 · commit 45d9680254b0 · 2025-11-18T00:27:57.000-08:00
Signed-off-by: Youngeun Kwon &lt;youngeunk@nvidia.com&gt;
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
@@ -230,6 +230,7 @@ policy:
       num_last_layers_in_bf16: 0
       num_first_layers_in_bf16: 0
       enable_vllm_metrics_logger: false # Set to true to enable vLLM internal metrics logger, might impact performance
+      vllm_metrics_logger_interval: 0.5 # Interval in seconds to collect vLLM logger metrics
     vllm_kwargs: {}
     colocated:
       # true: generation shares training GPUs
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
@@ -1038,9 +1038,6 @@ def grpo_train(
                 maybe_gpu_profile_step(policy_generation, total_steps + 1)
             val_metrics, validation_timings = None, None
 
-            # Clear vLLM logger metrics after each step
-            policy_generation.clear_vllm_logger_metrics()
-
             with timer.time("total_step_time"):
                 # Prepare batch
                 print("▶ Preparing batch...", flush=True)
@@ -1076,6 +1073,8 @@ def grpo_train(
 
                 dynamic_sampling_num_gen_batches += 1
                 with timer.time("generation"):
+                    # Clear vLLM logger metrics for each generation step
+                    policy_generation.clear_vllm_logger_metrics()
                     # Use penguin rollouts if enabled. We cascade penguin first since penguin requires async rollouts.
                     if _should_use_penguin(master_config):
                         generation_config = master_config["policy"]["generation"]
@@ -1125,10 +1124,9 @@ def grpo_train(
                             greedy=False,
                         )
                     policy_generation.finish_generation()
-
-                # Collect vLLM logger metrics for performance reporting
-                # inflight batch sizes and num pending samples are collected from each vLLM worker
-                vllm_logger_metrics = policy_generation.get_vllm_logger_metrics()
+                    # Collect vLLM logger metrics for performance reporting after each generation step
+                    # inflight batch sizes and num pending samples are collected from each vLLM worker
+                    vllm_logger_metrics = policy_generation.get_vllm_logger_metrics()
 
                 repeated_batch = scale_rewards(
                     repeated_batch, master_config["grpo"]["reward_scaling"]
@@ -1934,6 +1932,9 @@ def async_grpo_train(
 
     print("✅ Buffer ready! Starting training loop...")
 
+    # Clear vLLM logger metrics after at start of training
+    policy_generation.clear_vllm_logger_metrics()
+
     # Main training loop
     try:
         while step < master_config["grpo"]["max_num_steps"]:
@@ -1944,9 +1945,6 @@ def async_grpo_train(
             if policy != policy_generation:
                 maybe_gpu_profile_step(policy_generation, step + 1)
 
-            # Clear vLLM logger metrics after each step
-            policy_generation.clear_vllm_logger_metrics()
-
             with timer.time("total_step_time"):
                 # Sample trajectories from replay buffer
                 print("📦 Sampling from replay buffer...")
@@ -2179,6 +2177,9 @@ def async_grpo_train(
                         trajectory_collector.set_weight_version.remote(weight_version)
                         trajectory_collector.resume_after_refit.remote()
 
+                # Clear vLLM logger metrics after each refit (weight sync), starting a new logging cycle
+                policy_generation.clear_vllm_logger_metrics()
+
                 # Validation
                 val_metrics, validation_timings = None, None
                 is_last_step = step + 1 == master_config["grpo"]["max_num_steps"]
diff --git a/nemo_rl/algorithms/utils.py b/nemo_rl/algorithms/utils.py
@@ -401,12 +401,13 @@ def visualize_per_worker_load(per_worker_token_counts: dict[int, int]) -> float:
             v / max(per_worker_token_counts_list) for v in per_worker_token_counts_list
         ]
         max_rows_to_print = 100
+        bar_length = 20
         print("  • Visualizing Token Imbalance per Generation Worker:")
         for i in range(min(len(per_worker_token_counts_list), max_rows_to_print)):
             print(
                 f"    - Generated Tokens from Worker {i:3.0f}:"
-                f"{'■' * int(per_worker_load_ratio[i] * 10)}"
-                f"{'□' * (10 - int(per_worker_load_ratio[i] * 10))}"
+                f"{'■' * int(per_worker_load_ratio[i] * bar_length)}"
+                f"{'□' * (bar_length - int(per_worker_load_ratio[i] * bar_length))}"
                 f" Count: {per_worker_token_counts_list[i] / 1000:.1f}K"
             )
         estimated_idle_ratio = 1 - sum(per_worker_load_ratio) / len(
@@ -442,25 +443,77 @@ def visualize_per_worker_load(per_worker_token_counts: dict[int, int]) -> float:
         )
 
     # =====================================================
-    # vLLM Logger Metrics (inflight batch sizes and pending samples)
+    # vLLM Logger Metrics (inflight batch sizes, num pending samples, etc.)
     # =====================================================
+    def resize_timeline(data, new_size):
+        old_size = len(data)
+        x_old = np.linspace(0, 1, old_size)
+        x_new = np.linspace(0, 1, new_size)
+        return np.interp(x_new, x_old, data)
+
+    def visualize_per_worker_timeline(
+        metric_dict: dict[int, list[int]],
+        metric_name: str,
+        timeline_interval: float | None,
+    ) -> None:
+        dp_ranks = list(metric_dict.keys())
+        max_timeline_length = 50
+        marker = {0: "□", 1: "⧅", 2: "⛝", 3: "■"}
+
+        max_value = max(max(v) for v in metric_dict.values())
+        bin_width = (max_value + 1) / len(marker)
+
+        print(f"  - {metric_name}:")
+        print(f"    - Max value: {max_value}")
+        print("    - Timeline:")
+        for dp_idx, metric_values in metric_dict.items():
+            timeline = []
+            length = len(metric_values)
+            if timeline_interval is not None:
+                count_zeros = lambda x: sum(v == 0 for v in x)
+                idle = count_zeros(metric_values) * timeline_interval
+                active = length * timeline_interval - idle
+            if length > max_timeline_length:
+                resized_metric_values = resize_timeline(
+                    metric_values, max_timeline_length
+                )
+            else:
+                resized_metric_values = metric_values
+
+            for i, value in enumerate(resized_metric_values):
+                timeline.append(marker[min(int(value // bin_width), len(marker) - 1)])
+            if timeline_interval is not None:
+                print(
+                    f"    - Generation Worker {dp_idx:3.0f}: {' '.join(timeline)} (Active: {active:.2f} s, Idle: {idle:.2f} s)"
+                )
+            else:
+                print(f"    - Generation Worker {dp_idx:3.0f}: {' '.join(timeline)}")
+
     if "vllm_logger_metrics" in metrics:
+        # vllm_logger_metrics: dict[str (metric_name), dict[int (dp_idx), list[int] (metric_values)]]
+        # metric_name: "inflight_batch_sizes" or "num_pending_samples"
         vllm_logger_metrics = metrics["vllm_logger_metrics"]
+
         if vllm_logger_metrics is not None:
+            vllm_metrics_logger_interval = master_config["policy"]["generation"][
+                "vllm_cfg"
+            ]["vllm_metrics_logger_interval"]
             print("  • vLLM Logger Metrics:")
-            for dp_idx, inflight_batch_sizes in vllm_logger_metrics[
-                "inflight_batch_sizes"
-            ].items():
-                print(
-                    f"  - vLLM Inflight Batch Sizes for DP {dp_idx}: {inflight_batch_sizes}",
-                    flush=True,
-                )
-            for dp_idx, num_pending_samples in vllm_logger_metrics[
-                "num_pending_samples"
-            ].items():
-                print(
-                    f"  - vLLM Num Pending Samples for DP {dp_idx}: {num_pending_samples}",
-                    flush=True,
+            # Visualize the inflight batch sizes timeline
+            visualize_per_worker_timeline(
+                vllm_logger_metrics["inflight_batch_sizes"],
+                "Inflight Batch Sizes",
+                vllm_metrics_logger_interval,
+            )
+            max_num_pending_samples = max(
+                max(v) for v in vllm_logger_metrics["num_pending_samples"].values()
+            )
+            # If there is at least one pending sample, visualize the timeline
+            if max_num_pending_samples > 0:
+                visualize_per_worker_timeline(
+                    vllm_logger_metrics["num_pending_samples"],
+                    "Num Pending Samples",
+                    None,
                 )
 
     # =====================================================
diff --git a/nemo_rl/models/generation/vllm/vllm_generation.py b/nemo_rl/models/generation/vllm/vllm_generation.py
@@ -836,9 +836,9 @@ def get_vllm_logger_metrics(self) -> dict[str, Any]:
             dp_indices.append(dp_idx)
 
         results = ray.get(futures)
-        vllm_logger_metrics: dict[str, dict[int, dict[int, list[int]]]] = {
-            "inflight_batch_sizes": {},
-            "num_pending_samples": {},
+        vllm_logger_metrics: dict[str, dict[int, list[int]]] = {
+            "inflight_batch_sizes": {},  # dp_idx -> list[int]
+            "num_pending_samples": {},  # dp_idx -> list[int]
         }
 
         for dp_idx, stats in zip(dp_indices, results):
diff --git a/nemo_rl/models/generation/vllm/vllm_worker.py b/nemo_rl/models/generation/vllm/vllm_worker.py
@@ -330,41 +330,44 @@ def _patch_vllm_init_workers_ray():
         self._create_engine(llm_kwargs)
 
         # Optionally start periodic vLLM metrics logging if the flag is set
+        # NOTE: vLLM metrics logger is only supported with async engine enabled
         # Metrics logger only enabled for per-actor, model-owner only
-        if self.cfg["vllm_cfg"].get("enable_vllm_metrics_logger", False):
-            self._maybe_start_vllm_metrics_logger()
+        if self.cfg["vllm_cfg"].get("enable_vllm_metrics_logger", False) and self.cfg[
+            "vllm_cfg"
+        ].get("async_engine", False):
+            self._start_vllm_metrics_logger()
 
         # will be initialized in post_init
         # used in update_weights_from_ipc_handles
         self.vllm_device_ids = None
 
-    def _maybe_start_vllm_metrics_logger(self) -> None:
-        """Start a background thread that periodically prints vLLM inflight/queued sizes.
+    def _start_vllm_metrics_logger(self) -> None:
+        """Start a background thread that periodically collects vLLM logger metrics.
 
-        Controlled by env var NRL_VLLM_LOG_METRICS_INTERVAL_SEC. Set to a positive
-        float (e.g. "10") to enable. Runs only on the model-owner actor.
+        Controlled by vllm_metrics_logger_interval (default: 0.5) in vllm_cfg.
+        Runs only on the model-owner actor.
         """
+        assert self.cfg["vllm_cfg"].get("async_engine", False), (
+            "vLLM metrics logger is only supported with async engine enabled"
+        )
         # Run only on the model-owner actor
         if not getattr(self, "is_model_owner", False):
             return
 
-        try:
-            interval_s_str = os.environ.get("NRL_VLLM_LOG_METRICS_INTERVAL_SEC", "0.5")
-            if not interval_s_str:
-                return
-            interval_s = float(interval_s_str)
-        except Exception:
-            return
-
-        if interval_s <= 0:
-            return
+        assert "vllm_metrics_logger_interval" in self.cfg["vllm_cfg"], (
+            "vllm_metrics_logger_interval must be set in vllm_cfg if enable_vllm_metrics_logger is True"
+        )
+        interval_s = self.cfg["vllm_cfg"]["vllm_metrics_logger_interval"]
+        assert interval_s > 0, (
+            f"vllm_metrics_logger_interval must be a positive float, got {interval_s}"
+        )
 
         # Lazy import inside thread target to avoid import overhead if disabled
         stop_event = threading.Event()
         self._vllm_metrics_logger_stop_event = stop_event
 
-        self.inflight_batch_sizes: dict[int, list[int]] = {}
-        self.num_pending_samples: dict[int, list[int]] = {}
+        self.inflight_batch_sizes: list[int] = []
+        self.num_pending_samples: list[int] = []
 
         def _logger_loop():
             # Delay a little to let engine settle
@@ -386,25 +389,17 @@ def _logger_loop():
                             if isinstance(m, Gauge):
                                 # Log the vllm inflight batch sizes
                                 if m.name == "vllm:num_requests_running":
-                                    eng = int(m.labels.get("engine", "0"))
-                                    if eng not in self.inflight_batch_sizes:
-                                        self.inflight_batch_sizes[eng] = []
-                                    self.inflight_batch_sizes[eng].append(int(m.value))
+                                    self.inflight_batch_sizes.append(int(m.value))
                                 # Log the vllm pending number of requests in the queue
                                 elif m.name == "vllm:num_requests_waiting":
-                                    eng = int(m.labels.get("engine", "0"))
-                                    if eng not in self.num_pending_samples:
-                                        self.num_pending_samples[eng] = []
-                                    self.num_pending_samples[eng].append(int(m.value))
+                                    self.num_pending_samples.append(int(m.value))
                         except Exception:
                             print(
                                 "⚠️[vLLM Metric Logger]⚠️ Exception in vLLM metrics logger",
                                 flush=True,
                             )
-                            # tolerate bad metric entries
                             pass
                 except Exception:
-                    # Avoid crashing the worker on logging issues
                     print(
                         "⚠️[vLLM Metric Logger]⚠️ Exception in vLLM metrics logger",
                         flush=True,
@@ -439,8 +434,8 @@ def get_vllm_logger_metrics(self) -> dict[str, Any]:
     def clear_vllm_logger_metrics(self) -> None:
         if not self.cfg["vllm_cfg"].get("enable_vllm_metrics_logger", False):
             return
-        self.inflight_batch_sizes = {}
-        self.num_pending_samples = {}
+        self.inflight_batch_sizes = []
+        self.num_pending_samples = []
 
     def llm(self):
         return self.llm