Skip to content

Commit

Permalink
Colored oom message, comments improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
LeaveMyYard committed Apr 29, 2024
1 parent 22dc630 commit d841d79
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 13 deletions.
2 changes: 1 addition & 1 deletion robusta_krr/core/abstract/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class ResourceRecommendation(pd.BaseModel):
request: Optional[float]
limit: Optional[float]
info: Optional[str] = pd.Field(
None, description="Additional information about the recommendation. Currently used to explain undefined."
None, description="Additional information about the recommendation."
)

@classmethod
Expand Down
14 changes: 13 additions & 1 deletion robusta_krr/formatters/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ def __calc_diff(allocated, recommended, selector, multiplier=1) -> str:
return f"{diff_sign}{_format(abs(diff_val) * multiplier)}"


DEFAULT_INFO_COLOR = "grey27"
INFO_COLORS: dict[str, str] = {
"OOMKill detected": "dark_red",
}


def _format_request_str(item: ResourceScan, resource: ResourceType, selector: str) -> str:
allocated = getattr(item.object.allocations, selector)[resource]
info = item.recommended.info.get(resource)
Expand All @@ -46,14 +52,20 @@ def _format_request_str(item: ResourceScan, resource: ResourceType, selector: st
if diff != "":
diff = f"({diff}) "

if info is None:
info_formatted = ""
else:
color = INFO_COLORS.get(info, DEFAULT_INFO_COLOR)
info_formatted = f"\n[{color}]({info})[/{color}]"

return (
diff
+ f"[{severity.color}]"
+ _format(allocated)
+ " -> "
+ _format(recommended.value)
+ f"[/{severity.color}]"
+ (f"\n[grey27]({info})[/grey27]" if info else "")
+ info_formatted
)


Expand Down
25 changes: 14 additions & 11 deletions robusta_krr/strategies/simple.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
"""
This strategy is a version of a simple strategy that we might want to set as a default strategy for the user.
It is the same as the simple strategy, but it also uses OOMKilled events and memory limits to calculate memory recommendations.
Currently, it is in a testing mode and may not work as expected.
"""

from datetime import timedelta

import numpy as np
Expand Down Expand Up @@ -43,10 +37,10 @@ class SimpleStrategySettings(StrategySettings):
)
use_oomkill_data: bool = pd.Field(
False,
description="Whether to use OOMKilled data to calculate memory recommendations (experimental).",
description="Whether to bump the memory when OOMKills are detected (experimental).",
)
oom_memory_buffer_percentage: float = pd.Field(
25, gt=0, description="The percentage of added buffer to the max memory limit surpassed by OOMKilled event."
25, gt=0, description="What percentage to increase the memory when there are OOMKill events."
)

def calculate_memory_proposal(self, data: PodsTimeData, max_oomkill: float = 0) -> float:
Expand Down Expand Up @@ -116,6 +110,8 @@ def __calculate_cpu_proposal(
if len(data) == 0:
return ResourceRecommendation.undefined(info="No data")

# NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
# As CPUAmountLoader returns only the last value (1 point), [0, 1] is used to get the value
data_count = {pod: values[0, 1] for pod, values in history_data["CPUAmountLoader"].items()}
total_points_count = sum(data_count.values())

Expand All @@ -136,21 +132,26 @@ def __calculate_memory_proposal(
self, history_data: MetricsPodData, object_data: K8sObjectData
) -> ResourceRecommendation:
data = history_data["MaxMemoryLoader"]
info: list[str] = []

oomkill_detected = False

if self.settings.use_oomkill_data:
max_oomkill_data = history_data["MaxOOMKilledMemoryLoader"]
# NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
# As MaxOOMKilledMemoryLoader returns only the last value (1 point), [0, 1] is used to get the value
max_oomkill_value = (
np.max([values[0, 1] for values in max_oomkill_data.values()]) if len(max_oomkill_data) > 0 else 0
)
if max_oomkill_value != 0:
info.append("OOMKill detected")
oomkill_detected = True
else:
max_oomkill_value = 0

if len(data) == 0:
return ResourceRecommendation.undefined(info="No data")

# NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
# As MemoryAmountLoader returns only the last value (1 point), [0, 1] is used to get the value
data_count = {pod: values[0, 1] for pod, values in history_data["MemoryAmountLoader"].items()}
total_points_count = sum(data_count.values())

Expand All @@ -165,7 +166,9 @@ def __calculate_memory_proposal(
return ResourceRecommendation.undefined(info="HPA detected")

memory_usage = self.settings.calculate_memory_proposal(data, max_oomkill_value)
return ResourceRecommendation(request=memory_usage, limit=memory_usage, info=", ".join(info) if info else None)
return ResourceRecommendation(
request=memory_usage, limit=memory_usage, info="OOMKill detected" if oomkill_detected else None
)

def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult:
return {
Expand Down

0 comments on commit d841d79

Please sign in to comment.