From d841d79515c20ded2fe04db6f12db1231c96c378 Mon Sep 17 00:00:00 2001
From: LeaveMyYard <zhukovpavel2001@gmail.com>
Date: Mon, 29 Apr 2024 16:52:40 +0300
Subject: [PATCH] Colored oom message, comments improvements

---
 robusta_krr/core/abstract/strategies.py |  2 +-
 robusta_krr/formatters/table.py         | 14 +++++++++++++-
 robusta_krr/strategies/simple.py        | 25 ++++++++++++++-----------
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/robusta_krr/core/abstract/strategies.py b/robusta_krr/core/abstract/strategies.py
index 5b6521e1..0005a227 100644
--- a/robusta_krr/core/abstract/strategies.py
+++ b/robusta_krr/core/abstract/strategies.py
@@ -28,7 +28,7 @@ class ResourceRecommendation(pd.BaseModel):
     request: Optional[float]
     limit: Optional[float]
     info: Optional[str] = pd.Field(
-        None, description="Additional information about the recommendation. Currently used to explain undefined."
+        None, description="Additional information about the recommendation."
     )
 
     @classmethod
diff --git a/robusta_krr/formatters/table.py b/robusta_krr/formatters/table.py
index 202ec9ab..b028a058 100644
--- a/robusta_krr/formatters/table.py
+++ b/robusta_krr/formatters/table.py
@@ -33,6 +33,12 @@ def __calc_diff(allocated, recommended, selector, multiplier=1) -> str:
         return f"{diff_sign}{_format(abs(diff_val) * multiplier)}"
 
 
+DEFAULT_INFO_COLOR = "grey27"
+INFO_COLORS: dict[str, str] = {
+    "OOMKill detected": "dark_red",
+}
+
+
 def _format_request_str(item: ResourceScan, resource: ResourceType, selector: str) -> str:
     allocated = getattr(item.object.allocations, selector)[resource]
     info = item.recommended.info.get(resource)
@@ -46,6 +52,12 @@ def _format_request_str(item: ResourceScan, resource: ResourceType, selector: st
     if diff != "":
         diff = f"({diff}) "
 
+    if info is None:
+        info_formatted = ""
+    else:
+        color = INFO_COLORS.get(info, DEFAULT_INFO_COLOR)
+        info_formatted = f"\n[{color}]({info})[/{color}]"
+
     return (
         diff
         + f"[{severity.color}]"
@@ -53,7 +65,7 @@ def _format_request_str(item: ResourceScan, resource: ResourceType, selector: st
         + " -> "
         + _format(recommended.value)
         + f"[/{severity.color}]"
-        + (f"\n[grey27]({info})[/grey27]" if info else "")
+        + info_formatted
     )
 
 
diff --git a/robusta_krr/strategies/simple.py b/robusta_krr/strategies/simple.py
index 7d4150c8..25c1a667 100644
--- a/robusta_krr/strategies/simple.py
+++ b/robusta_krr/strategies/simple.py
@@ -1,9 +1,3 @@
-"""
-    This strategy is a version of a simple strategy that we might want to set as a default strategy for the user.
-    It is the same as the simple strategy, but it also uses OOMKilled events and memory limits to calculate memory recommendations.
-    Currently, it is in a testing mode and may not work as expected.
-"""
-
 from datetime import timedelta
 
 import numpy as np
@@ -43,10 +37,10 @@ class SimpleStrategySettings(StrategySettings):
     )
     use_oomkill_data: bool = pd.Field(
         False,
-        description="Whether to use OOMKilled data to calculate memory recommendations (experimental).",
+        description="Whether to bump the memory when OOMKills are detected (experimental).",
     )
     oom_memory_buffer_percentage: float = pd.Field(
-        25, gt=0, description="The percentage of added buffer to the max memory limit surpassed by OOMKilled event."
+        25, gt=0, description="What percentage to increase the memory when there are OOMKill events."
     )
 
     def calculate_memory_proposal(self, data: PodsTimeData, max_oomkill: float = 0) -> float:
@@ -116,6 +110,8 @@ def __calculate_cpu_proposal(
         if len(data) == 0:
             return ResourceRecommendation.undefined(info="No data")
 
+        # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
+        # As CPUAmountLoader returns only the last value (1 point), [0, 1] is used to get the value
         data_count = {pod: values[0, 1] for pod, values in history_data["CPUAmountLoader"].items()}
         total_points_count = sum(data_count.values())
 
@@ -136,21 +132,26 @@ def __calculate_memory_proposal(
         self, history_data: MetricsPodData, object_data: K8sObjectData
     ) -> ResourceRecommendation:
         data = history_data["MaxMemoryLoader"]
-        info: list[str] = []
+
+        oomkill_detected = False
 
         if self.settings.use_oomkill_data:
             max_oomkill_data = history_data["MaxOOMKilledMemoryLoader"]
+            # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
+            # As MaxOOMKilledMemoryLoader returns only the last value (1 point), [0, 1] is used to get the value
             max_oomkill_value = (
                 np.max([values[0, 1] for values in max_oomkill_data.values()]) if len(max_oomkill_data) > 0 else 0
             )
             if max_oomkill_value != 0:
-                info.append("OOMKill detected")
+                oomkill_detected = True
         else:
             max_oomkill_value = 0
 
         if len(data) == 0:
             return ResourceRecommendation.undefined(info="No data")
 
+        # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
+        # As MemoryAmountLoader returns only the last value (1 point), [0, 1] is used to get the value
         data_count = {pod: values[0, 1] for pod, values in history_data["MemoryAmountLoader"].items()}
         total_points_count = sum(data_count.values())
 
@@ -165,7 +166,9 @@ def __calculate_memory_proposal(
             return ResourceRecommendation.undefined(info="HPA detected")
 
         memory_usage = self.settings.calculate_memory_proposal(data, max_oomkill_value)
-        return ResourceRecommendation(request=memory_usage, limit=memory_usage, info=", ".join(info) if info else None)
+        return ResourceRecommendation(
+            request=memory_usage, limit=memory_usage, info="OOMKill detected" if oomkill_detected else None
+        )
 
     def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult:
         return {