Skip to content

Commit

Permalink
Merge pull request kruize#1206 from msvinaykumar/logKruizeEventsNotif…
Browse files Browse the repository at this point in the history
…ication

Add Metrics Logging for Kruize Recommendations
  • Loading branch information
dinogun authored Jun 28, 2024
2 parents da295be + 344e3c8 commit 7045682
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 52 deletions.
23 changes: 23 additions & 0 deletions design/KruizePromQL.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,29 @@ KruizeMethod_count{application="Kruize",method="generatePlots",status="success",
KruizeMethod_sum{application="Kruize",method="generatePlots",status="success",} 0.050705769
```

## Kruize Notifications Metrics

The following are the avalable kruize notifcations that gets generated after updateRecommendations API call.

Sample Output:

```
KruizeNotifications_total{api="updateRecommendations",application="Kruize",experiment_details="quarkus-resteasy-kruize-min-http-response-time-db_1_2|tfb-server-0|2023-01-02T05:30:00.000Z|timestamp|null|null|223002|error|Invalid Amount in CPU Section",} 1.0
KruizeNotifications_total{api="updateRecommendations",application="Kruize",experiment_details="quarkus-resteasy-kruize-min-http-response-time-db_1_2|tfb-server-0|2023-01-02T05:30:00.000Z|model|short_term|performance|221001|error|Number of pods cannot be zero",} 1.0
KruizeNotifications_total{api="updateRecommendations",application="Kruize",experiment_details="quarkus-resteasy-kruize-min-http-response-time-db_1_2|tfb-server-0|2023-01-02T05:30:00.000Z|model|short_term|cost|221001|error|Number of pods cannot be zero",} 1.0
```
Creates a counter with tags for the given level, term, model, and list of recommendation notifications.

level The level of the notification (e.g., container, timestamp, term, model).

term The term associated with the notification.

model The cost or performance model associated with the notification.

recommendationNotificationList The list of recommendation notifications to create counters for. code , type and message

## Time taken for KruizeDB metrics

To monitor the performance of these methods, you can use the following metrics:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.autotune.common.k8sObjects.K8sObject;
import com.autotune.common.utils.CommonUtils;
import com.autotune.database.service.ExperimentDBService;
import com.autotune.metrics.KruizeNotificationCollectionRegistry;
import com.autotune.operator.KruizeDeploymentInfo;
import com.autotune.utils.GenericRestApiClient;
import com.autotune.utils.KruizeConstants;
Expand Down Expand Up @@ -329,6 +330,10 @@ public void generateRecommendations(KruizeObject kruizeObject) {
// generate recommendations based on each container
generateRecommendationsBasedOnContainer(containerData, kruizeObject);
// TODO: generate recommendations based on namespace, kubernetes_object name and type
// todo The process of data validation and notification generation is currently tightly coupled and needs to be separated. By doing so, we can avoid additional iterations at kruizeNotificationCollectionRegistry.logNotification. This should be included as part of the code refactor.
KruizeNotificationCollectionRegistry kruizeNotificationCollectionRegistry = new KruizeNotificationCollectionRegistry(kruizeObject.getExperimentName(), getInterval_end_time(), containerData.getContainer_name());
kruizeNotificationCollectionRegistry.logNotification(containerData);

}
}
}
Expand Down Expand Up @@ -390,6 +395,8 @@ private void generateRecommendationsBasedOnContainer(ContainerData containerData
containerRecommendations.setNotificationMap(recommendationLevelNM);
// set the data object to map
containerRecommendations.setData(timestampBasedRecommendationMap);


// set the container recommendations in container object
containerData.setContainerRecommendations(containerRecommendations);
}
Expand Down Expand Up @@ -1430,7 +1437,7 @@ private String getResults(Map<String, KruizeObject> mainKruizeExperimentMAP, Kru
* @param interval_start_time The start time of the interval for fetching metrics.
* @param dataSourceInfo The datasource object to fetch metrics from.
* @throws Exception if an error occurs during the fetching process.
* TODO: Need to add right abstractions for this
* TODO: Need to add right abstractions for this
*/
public void fetchMetricsBasedOnDatasource(KruizeObject kruizeObject, Timestamp interval_end_time, Timestamp interval_start_time, DataSourceInfo dataSourceInfo) throws Exception {
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package com.autotune.metrics;

import com.autotune.analyzer.recommendations.RecommendationNotification;
import com.autotune.analyzer.recommendations.objects.MappedRecommendationForModel;
import com.autotune.analyzer.recommendations.objects.MappedRecommendationForTimestamp;
import com.autotune.analyzer.recommendations.objects.TermRecommendations;
import com.autotune.common.data.result.ContainerData;
import com.autotune.operator.KruizeDeploymentInfo;
import com.autotune.utils.KruizeConstants;
import com.autotune.utils.MetricsConfig;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Tags;

import java.sql.Timestamp;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

/**
* KruizeNotificationCollectionRegistry is responsible for logging and creating metrics for notifications
* related to Kruize recommendations.
*/
public class KruizeNotificationCollectionRegistry {
private String experiment_name;
private Timestamp interval_end_time;
private String container_name;

/**
* Constructor to initialize KruizeNotificationCollectionRegistry with experiment name, interval end time, and container name.
*
* @param experiment_name Name of the experiment.
* @param interval_end_time End time of the interval.
* @param container_name Name of the container.
*/
public KruizeNotificationCollectionRegistry(String experiment_name, Timestamp interval_end_time, String container_name) {
this.experiment_name = experiment_name;
this.interval_end_time = interval_end_time;
this.container_name = container_name;
}

/**
* Logs notifications from the given ContainerData by iterating through its recommendation structure and creating appropriate counters.
*
* @param containerData The container data from which to log notifications.
*/
public void logNotification(ContainerData containerData) {
HashMap<Integer, RecommendationNotification> containerLevelNotifications = containerData.getContainerRecommendations().getNotificationMap();
createCounterTag("container", null, null, containerLevelNotifications.values());
for (MappedRecommendationForTimestamp mappedRecommendationForTimestamp : containerData.getContainerRecommendations().getData().values()) {
HashMap<Integer, RecommendationNotification> timeStampNotificationHashMap = mappedRecommendationForTimestamp.getHigherLevelNotificationMap();
createCounterTag("timestamp", null, null, timeStampNotificationHashMap.values());
for (Map.Entry<String, TermRecommendations> entry : mappedRecommendationForTimestamp.getRecommendationForTermHashMap().entrySet()) {
String termName = entry.getKey();
TermRecommendations termRecommendations = entry.getValue();
HashMap<Integer, RecommendationNotification> termLevelNotificationHashMap = termRecommendations.getNotifications();
createCounterTag("term", termName, null, termLevelNotificationHashMap.values());
if (null != termRecommendations.getRecommendationForModelHashMap()) {
for (Map.Entry<String, MappedRecommendationForModel> recommendationForModel : termRecommendations.getRecommendationForModelHashMap().entrySet()) {
String modelName = recommendationForModel.getKey();
MappedRecommendationForModel mappedRecommendationForModel = recommendationForModel.getValue();
HashMap<Integer, RecommendationNotification> modelNotificationHashMap = mappedRecommendationForModel.getNotificationHashMap();
createCounterTag("model", termName, modelName, modelNotificationHashMap.values());
}
}
}
}
}

/**
* Creates a counter with tags for the given level, term, model, and list of recommendation notifications.
*
* @param level The level of the notification (e.g., container, timestamp, term, model).
* @param term The term associated with the notification.
* @param model The cost or performance model associated with the notification.
* @param recommendationNotificationList The list of recommendation notifications to create counters for.
*/
public void createCounterTag(String level, String term, String model, Collection<RecommendationNotification> recommendationNotificationList) {
for (RecommendationNotification recommendationNotification : recommendationNotificationList) {
Tags additionalTags = Tags.empty();
if (("|" + KruizeDeploymentInfo.log_recommendation_metrics_level + "|").contains("|" + recommendationNotification.getType() + "|") == true) {
additionalTags = additionalTags.and(KruizeConstants.KRUIZE_RECOMMENDATION_METRICS.TAG_NAME, String.format(KruizeConstants.KRUIZE_RECOMMENDATION_METRICS.notification_format, this.experiment_name, this.container_name, KruizeConstants.DateFormats.simpleDateFormatForUTC.format(this.interval_end_time), level, term, model, String.valueOf(recommendationNotification.getCode()), recommendationNotification.getType(), recommendationNotification.getMessage()));
Counter counterNotifications = MetricsConfig.meterRegistry().find(KruizeConstants.KRUIZE_RECOMMENDATION_METRICS.METRIC_NAME).tags(additionalTags).counter();
if (counterNotifications == null) {
counterNotifications = MetricsConfig.timerBKruizeNotifications.tags(additionalTags).register(MetricsConfig.meterRegistry);
}
counterNotifications.increment();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public class KruizeDeploymentInfo {
public static String k8s_type; // ABC
public static String auth_type;
public static Boolean plots = true;
public static String log_recommendation_metrics_level = "error|critical";
public static String auth_token;
public static String database_admin_username;
public static String database_admin_password;
Expand Down
Loading

0 comments on commit 7045682

Please sign in to comment.