From d841d79515c20ded2fe04db6f12db1231c96c378 Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Mon, 29 Apr 2024 16:52:40 +0300 Subject: [PATCH] Colored oom message, comments improvements --- robusta_krr/core/abstract/strategies.py | 2 +- robusta_krr/formatters/table.py | 14 +++++++++++++- robusta_krr/strategies/simple.py | 25 ++++++++++++++----------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/robusta_krr/core/abstract/strategies.py b/robusta_krr/core/abstract/strategies.py index 5b6521e1..0005a227 100644 --- a/robusta_krr/core/abstract/strategies.py +++ b/robusta_krr/core/abstract/strategies.py @@ -28,7 +28,7 @@ class ResourceRecommendation(pd.BaseModel): request: Optional[float] limit: Optional[float] info: Optional[str] = pd.Field( - None, description="Additional information about the recommendation. Currently used to explain undefined." + None, description="Additional information about the recommendation." ) @classmethod diff --git a/robusta_krr/formatters/table.py b/robusta_krr/formatters/table.py index 202ec9ab..b028a058 100644 --- a/robusta_krr/formatters/table.py +++ b/robusta_krr/formatters/table.py @@ -33,6 +33,12 @@ def __calc_diff(allocated, recommended, selector, multiplier=1) -> str: return f"{diff_sign}{_format(abs(diff_val) * multiplier)}" +DEFAULT_INFO_COLOR = "grey27" +INFO_COLORS: dict[str, str] = { + "OOMKill detected": "dark_red", +} + + def _format_request_str(item: ResourceScan, resource: ResourceType, selector: str) -> str: allocated = getattr(item.object.allocations, selector)[resource] info = item.recommended.info.get(resource) @@ -46,6 +52,12 @@ def _format_request_str(item: ResourceScan, resource: ResourceType, selector: st if diff != "": diff = f"({diff}) " + if info is None: + info_formatted = "" + else: + color = INFO_COLORS.get(info, DEFAULT_INFO_COLOR) + info_formatted = f"\n[{color}]({info})[/{color}]" + return ( diff + f"[{severity.color}]" @@ -53,7 +65,7 @@ def _format_request_str(item: ResourceScan, resource: ResourceType, selector: st + " -> " + _format(recommended.value) + f"[/{severity.color}]" - + (f"\n[grey27]({info})[/grey27]" if info else "") + + info_formatted ) diff --git a/robusta_krr/strategies/simple.py b/robusta_krr/strategies/simple.py index 7d4150c8..25c1a667 100644 --- a/robusta_krr/strategies/simple.py +++ b/robusta_krr/strategies/simple.py @@ -1,9 +1,3 @@ -""" - This strategy is a version of a simple strategy that we might want to set as a default strategy for the user. - It is the same as the simple strategy, but it also uses OOMKilled events and memory limits to calculate memory recommendations. - Currently, it is in a testing mode and may not work as expected. -""" - from datetime import timedelta import numpy as np @@ -43,10 +37,10 @@ class SimpleStrategySettings(StrategySettings): ) use_oomkill_data: bool = pd.Field( False, - description="Whether to use OOMKilled data to calculate memory recommendations (experimental).", + description="Whether to bump the memory when OOMKills are detected (experimental).", ) oom_memory_buffer_percentage: float = pd.Field( - 25, gt=0, description="The percentage of added buffer to the max memory limit surpassed by OOMKilled event." + 25, gt=0, description="What percentage to increase the memory when there are OOMKill events." ) def calculate_memory_proposal(self, data: PodsTimeData, max_oomkill: float = 0) -> float: @@ -116,6 +110,8 @@ def __calculate_cpu_proposal( if len(data) == 0: return ResourceRecommendation.undefined(info="No data") + # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value] + # As CPUAmountLoader returns only the last value (1 point), [0, 1] is used to get the value data_count = {pod: values[0, 1] for pod, values in history_data["CPUAmountLoader"].items()} total_points_count = sum(data_count.values()) @@ -136,21 +132,26 @@ def __calculate_memory_proposal( self, history_data: MetricsPodData, object_data: K8sObjectData ) -> ResourceRecommendation: data = history_data["MaxMemoryLoader"] - info: list[str] = [] + + oomkill_detected = False if self.settings.use_oomkill_data: max_oomkill_data = history_data["MaxOOMKilledMemoryLoader"] + # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value] + # As MaxOOMKilledMemoryLoader returns only the last value (1 point), [0, 1] is used to get the value max_oomkill_value = ( np.max([values[0, 1] for values in max_oomkill_data.values()]) if len(max_oomkill_data) > 0 else 0 ) if max_oomkill_value != 0: - info.append("OOMKill detected") + oomkill_detected = True else: max_oomkill_value = 0 if len(data) == 0: return ResourceRecommendation.undefined(info="No data") + # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value] + # As MemoryAmountLoader returns only the last value (1 point), [0, 1] is used to get the value data_count = {pod: values[0, 1] for pod, values in history_data["MemoryAmountLoader"].items()} total_points_count = sum(data_count.values()) @@ -165,7 +166,9 @@ def __calculate_memory_proposal( return ResourceRecommendation.undefined(info="HPA detected") memory_usage = self.settings.calculate_memory_proposal(data, max_oomkill_value) - return ResourceRecommendation(request=memory_usage, limit=memory_usage, info=", ".join(info) if info else None) + return ResourceRecommendation( + request=memory_usage, limit=memory_usage, info="OOMKill detected" if oomkill_detected else None + ) def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult: return {