Skip to content

Commit

Permalink
Merge pull request kruize#1165 from khansaad/upd-recomm-validation-fix
Browse files Browse the repository at this point in the history
Fix the missing validation for Update recommendation API
  • Loading branch information
dinogun authored Apr 19, 2024
2 parents 80ba618 + a2b3623 commit d9f3b3c
Show file tree
Hide file tree
Showing 15 changed files with 131 additions and 27 deletions.
15 changes: 14 additions & 1 deletion design/KruizeConfiguration.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,17 @@ The following environment variables are set using the `kubectl apply` command wi
- Details: The value represents the number of days, indicating the duration for which partitions belonging to Kruize
that are older than the specified number of days from today's date will be deleted. For example, if the value is
set to "16," Kruize will automatically delete partitions older than 16 days, helping manage and optimize storage
resources.
resources.
- **plots**
- Description: Enable or disable box plots feature.
- Value: "false"
- Details: Box plot feature is in preview stage right now. To test it, we can enable it.
- **local**
- Description: Enable or disable the Kruize local monitoring use case.
- Value: "false"
- Details: Kruize local feature is in the preview stage right now. To test it, we can enable it.
- **logAllHttpReqAndResp**
- Description: Enable or disable logging of all the requests and responses.
- Value: "true"
- Details: This flag is added for getting the details of the inputs passed to the APIs and the corresponding response
generated by it. This helps us in debugging the API easily in case of failures.
3 changes: 2 additions & 1 deletion design/MonitoringModeAPI.md
Original file line number Diff line number Diff line change
Expand Up @@ -3283,7 +3283,8 @@ The response will contain a array of JSON object with the updated recommendation
| 400 | experiment_name is mandatory. |
| 400 | interval_end_time is mandatory. |
| 400 | Given timestamp - \" 2023-011-02T00:00:00.000Z \" is not a valid timestamp format. |
| 400 | Data not found!. |
| 400 | No metrics available from `2024-01-15T00:00:00.000Z` to `2023-12-31T00:00:00.000Z`. |
| 400 | Not Found: experiment_name does not exist: exp1. |
| 400 | The gap between the interval_start_time and interval_end_time must be within a maximum of 15 days! |
| 400 | The Start time should precede the End time! | |
| 500 | Internal Server Error |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ data:
"monitoringendpoint": "prometheus-k8s",
"savetodb": "true",
"dbdriver": "jdbc:postgresql://",
"plots": "false",
"local": "false",
"logAllHttpReqAndResp": "true",
"hibernate": {
"dialect": "org.hibernate.dialect.PostgreSQLDialect",
"driver": "org.postgresql.Driver",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ data:
"monitoringendpoint": "prometheus-k8s",
"savetodb": "true",
"dbdriver": "jdbc:postgresql://",
"plots": "false",
"local": "false",
"logAllHttpReqAndResp": "true",
"hibernate": {
"dialect": "org.hibernate.dialect.PostgreSQLDialect",
"driver": "org.postgresql.Driver",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ data:
"dbdriver": "jdbc:postgresql://",
"plots": "false",
"local": "false",
"logAllHttpReqAndResp": "true",
"hibernate": {
"dialect": "org.hibernate.dialect.PostgreSQLDialect",
"driver": "org.postgresql.Driver",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ data:
"dbdriver": "jdbc:postgresql://",
"plots": "false",
"local": "false",
"logAllHttpReqAndResp": "true",
"hibernate": {
"dialect": "org.hibernate.dialect.PostgreSQLDialect",
"driver": "org.postgresql.Driver",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,30 +231,40 @@ public KruizeObject prepareRecommendations(int calCount) {
Timestamp interval_start_time = Timestamp.valueOf(Objects.requireNonNull(getInterval_end_time()).toLocalDateTime().minusDays(maxDay));

// update the KruizeObject to have the results data from the available datasource
getResults(mainKruizeExperimentMAP, kruizeObject, experimentName, interval_start_time, dataSource);
try {
String errorMsg = getResults(mainKruizeExperimentMAP, kruizeObject, experimentName, interval_start_time, dataSource);
if (!errorMsg.isEmpty()) {
throw new Exception(errorMsg);
}
} catch (Exception e) {
LOGGER.error("UpdateRecommendations API request count: {} failed", calCount);
kruizeObject = new KruizeObject();
kruizeObject.setValidation_data(new ValidationOutputData(false, e.getMessage(), HttpServletResponse.SC_BAD_REQUEST));
return kruizeObject;
}

// generate recommendation
try {
generateRecommendations(kruizeObject);
// store the recommendations in the DB
validationOutputData = addRecommendationsToDB(mainKruizeExperimentMAP, kruizeObject);
if (!validationOutputData.isSuccess()) {
LOGGER.debug("UpdateRecommendations API request count: {} failed", calCount);
LOGGER.error("UpdateRecommendations API request count: {} failed", calCount);
validationOutputData = new ValidationOutputData(false, validationOutputData.getMessage(), HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
} else {
LOGGER.debug("UpdateRecommendations API request count: {} success", calCount);
}
kruizeObject.setValidation_data(validationOutputData);
} catch (Exception e) {
LOGGER.debug("UpdateRecommendations API request count: {} failed", calCount);
LOGGER.error("UpdateRecommendations API request count: {} failed", calCount);
LOGGER.error("Failed to create recommendation for experiment: {} and interval_start_time: {} and interval_end_time: {}",
experimentName, interval_start_time, interval_end_time);
kruizeObject.setValidation_data(new ValidationOutputData(false, e.getMessage(), HttpServletResponse.SC_INTERNAL_SERVER_ERROR));
}
} catch (Exception e) {
LOGGER.error("Exception occurred while generating recommendations for experiment: {} and interval_end_time: " +
"{} : {}", experimentName, interval_end_time, e.getMessage());
LOGGER.debug("UpdateRecommendations API request count: {} failed", calCount);
LOGGER.error("UpdateRecommendations API request count: {} failed", calCount);
kruizeObject.setValidation_data(new ValidationOutputData(false, e.getMessage(), HttpServletResponse.SC_INTERNAL_SERVER_ERROR));
}
return kruizeObject;
Expand Down Expand Up @@ -1318,13 +1328,19 @@ private ValidationOutputData addRecommendationsToDB(Map<String, KruizeObject> ma
* @param dataSource The data source used for monitoring.
* @throws Exception if an error occurs during the process of fetching and storing results.
*/
private void getResults(Map<String, KruizeObject> mainKruizeExperimentMAP, KruizeObject kruizeObject,
private String getResults(Map<String, KruizeObject> mainKruizeExperimentMAP, KruizeObject kruizeObject,
String experimentName, Timestamp intervalStartTime, String dataSource) throws Exception {
String errorMsg = "";
// get data from the DB in case of remote monitoring
if (kruizeObject.getExperiment_usecase_type().isRemote_monitoring()) {
mainKruizeExperimentMAP.put(experimentName, kruizeObject);
try {
new ExperimentDBService().loadResultsFromDBByName(mainKruizeExperimentMAP, experimentName, intervalStartTime, interval_end_time);
boolean resultsAvailable = new ExperimentDBService().loadResultsFromDBByName(mainKruizeExperimentMAP, experimentName, intervalStartTime, interval_end_time);
if (!resultsAvailable) {
errorMsg = String.format(AnalyzerErrorConstants.AutotuneObjectErrors.NO_METRICS_AVAILABLE, intervalEndTimeStr, intervalStartTime);
LOGGER.error(errorMsg);
return errorMsg;
}
} catch (Exception e) {
LOGGER.error("Failed to fetch the results from the DB: {}", e.getMessage());
}
Expand All @@ -1337,6 +1353,7 @@ private void getResults(Map<String, KruizeObject> mainKruizeExperimentMAP, Kruiz
// Fetch metrics based on the datasource
fetchMetricsBasedOnDatasource(kruizeObject, interval_end_time, intervalStartTime, dataSourceInfo);
}
return errorMsg;
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.autotune.analyzer.serviceObjects.ListRecommendationsAPIObject;
import com.autotune.analyzer.utils.GsonUTCDateAdapter;
import com.autotune.common.data.result.ContainerData;
import com.autotune.operator.KruizeDeploymentInfo;
import com.autotune.utils.KruizeConstants;
import com.autotune.utils.MetricsConfig;
import com.autotune.utils.Utils;
Expand Down Expand Up @@ -81,16 +82,18 @@ protected void doPost(HttpServletRequest request, HttpServletResponse response)
LOGGER.debug("UpdateRecommendations API request count: {}", calCount);
String statusValue = "failure";
Timer.Sample timerBUpdateRecommendations = Timer.start(MetricsConfig.meterRegistry());
try {
// Set the character encoding of the request to UTF-8
request.setCharacterEncoding(CHARACTER_ENCODING);
// Get the values from the request parameters
String experiment_name = request.getParameter(KruizeConstants.JSONKeys.EXPERIMENT_NAME);
String intervalEndTimeStr = request.getParameter(KruizeConstants.JSONKeys.INTERVAL_END_TIME);
// Set the character encoding of the request to UTF-8
request.setCharacterEncoding(CHARACTER_ENCODING);
// Get the values from the request parameters
String experiment_name = request.getParameter(KruizeConstants.JSONKeys.EXPERIMENT_NAME);
String intervalEndTimeStr = request.getParameter(KruizeConstants.JSONKeys.INTERVAL_END_TIME);

String intervalStartTimeStr = request.getParameter(KruizeConstants.JSONKeys.INTERVAL_START_TIME);
Timestamp interval_end_time;
Timestamp interval_start_time = null;
String intervalStartTimeStr = request.getParameter(KruizeConstants.JSONKeys.INTERVAL_START_TIME);
Timestamp interval_end_time;
Timestamp interval_start_time = null;
if (KruizeDeploymentInfo.logAllHttpReqAndResp)
LOGGER.info("experiment_name : {} and interval_start_time : {} and interval_end_time : {} ", experiment_name, intervalStartTimeStr, intervalEndTimeStr);
try {
// create recommendation engine object
RecommendationEngine recommendationEngine = new RecommendationEngine(experiment_name, intervalEndTimeStr, intervalStartTimeStr);
// validate and create KruizeObject if successful
Expand All @@ -104,7 +107,7 @@ protected void doPost(HttpServletRequest request, HttpServletResponse response)
sendSuccessResponse(response, kruizeObject, interval_end_time);
statusValue = "success";
} else {
LOGGER.debug("UpdateRecommendations API request count: {} failed", calCount);
LOGGER.error("UpdateRecommendations API request count: {} failed", calCount);
sendErrorResponse(response, null, kruizeObject.getValidation_data().getErrorCode(), kruizeObject.getValidation_data().getMessage());
}
} else {
Expand All @@ -113,7 +116,8 @@ protected void doPost(HttpServletRequest request, HttpServletResponse response)
}

} catch (Exception e) {
LOGGER.debug("UpdateRecommendations API request count: {} failed", calCount);
LOGGER.error("UpdateRecommendations API request count: {} failed", calCount);
LOGGER.error("UpdateRecommendations API, experiment_name: {}, intervalEndTimeStr: {}", experiment_name, intervalEndTimeStr);
LOGGER.error("Exception: " + e.getMessage());
e.printStackTrace();
sendErrorResponse(response, e, HttpServletResponse.SC_INTERNAL_SERVER_ERROR, e.getMessage());
Expand Down Expand Up @@ -166,6 +170,8 @@ public boolean shouldSkipClass(Class<?> clazz) {
.create();
gsonStr = gsonObj.toJson(recommendationList);
}
if (KruizeDeploymentInfo.logAllHttpReqAndResp)
LOGGER.info("Update Recommendation API response: {}", recommendationList);
response.getWriter().println(gsonStr);
response.getWriter().close();
}
Expand Down
13 changes: 8 additions & 5 deletions src/main/java/com/autotune/analyzer/services/UpdateResults.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ protected void doPost(HttpServletRequest request, HttpServletResponse response)
.registerTypeAdapter(Double.class, new CustomNumberDeserializer())
.registerTypeAdapter(Integer.class, new CustomNumberDeserializer())
.create();
LOGGER.debug("updateResults API request payload for requestID {} is {}", calCount, inputData);
if (KruizeDeploymentInfo.logAllHttpReqAndResp)
LOGGER.info("updateResults API request payload for requestID {} is {}", calCount, inputData);
try {
updateResultsAPIObjects = Arrays.asList(gson.fromJson(inputData, UpdateResultsAPIObject[].class));
} catch (JsonParseException e) {
Expand Down Expand Up @@ -119,15 +120,15 @@ protected void doPost(HttpServletRequest request, HttpServletResponse response)
);
request.setAttribute("data", jsonObjectList);
String errorMessage = String.format("Out of a total of %s records, %s failed to save", updateResultsAPIObjects.size(), failureAPIObjs.size());
LOGGER.debug("updateResults API request payload for requestID {} failed", calCount);
LOGGER.error("updateResults API request payload for requestID {} failed", calCount);
sendErrorResponse(inputData, request, response, null, HttpServletResponse.SC_BAD_REQUEST, errorMessage);
} else {
LOGGER.debug("updateResults API request payload for requestID {} success", calCount);
sendSuccessResponse(response, AnalyzerConstants.ServiceConstants.RESULT_SAVED);
statusValue = "success";
}
} catch (Exception e) {
LOGGER.debug("updateResults API request payload for requestID {} failed", calCount);
LOGGER.error("updateResults API request payload for requestID {} failed", calCount);
LOGGER.error("Exception: " + e.getMessage());
e.printStackTrace();
sendErrorResponse(inputData, request, response, e, HttpServletResponse.SC_INTERNAL_SERVER_ERROR, e.getMessage());
Expand All @@ -148,7 +149,8 @@ private void sendSuccessResponse(HttpServletResponse response, String message) t
String successOutput = new Gson().toJson(
new KruizeResponse(message, HttpServletResponse.SC_CREATED, "", "SUCCESS")
);
LOGGER.debug(successOutput);
if (KruizeDeploymentInfo.logAllHttpReqAndResp)
LOGGER.info("Update Results API response: {}", successOutput);
out.append(
successOutput
);
Expand All @@ -162,7 +164,8 @@ public void sendErrorResponse(String inputPayload, HttpServletRequest request, H
e.printStackTrace();
if (null == errorMsg) errorMsg = e.getMessage();
}
LOGGER.debug("UpdateRequestsAPI input pay load {} ", inputPayload);
if (KruizeDeploymentInfo.logAllHttpReqAndResp)
LOGGER.info("UpdateRequestsAPI input pay load {} ", inputPayload);
response.sendError(httpStatusCode, errorMsg);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public static final class AutotuneObjectErrors {
public static final String DUPLICATE_PERF_PROFILE = "Performance Profile already exists: ";
public static final String MISSING_PERF_PROFILE = "Not Found: performance_profile does not exist: ";
public static final String MISSING_EXPERIMENT_NAME = "Not Found: experiment_name does not exist: ";
public static final String MISSING_INTERVAL_END_TIME = "Not Found: interval_end_time does not exist: ";
public static final String NO_METRICS_AVAILABLE = "No metrics available from %s to %s";
public static final String UNSUPPORTED_EXPERIMENT = String.format("At present, the system does not support bulk entries!");
public static final String UNSUPPORTED_EXPERIMENT_RESULTS = String.format("At present, the system does not support bulk entries exceeding %s in quantity!", KruizeDeploymentInfo.bulk_update_results_limit);
public static final String UNSUPPORTED_BULK_KUBERNETES = "Bulk Kubernetes objects are currently unsupported!";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,14 @@ public void loadAllPerformanceProfiles(Map<String, PerformanceProfile> performan
}
}

public void loadResultsFromDBByName(Map<String, KruizeObject> mainKruizeExperimentMap, String experimentName, Timestamp calculated_start_time, Timestamp interval_end_time) throws Exception {
public boolean loadResultsFromDBByName(Map<String, KruizeObject> mainKruizeExperimentMap, String experimentName, Timestamp calculated_start_time, Timestamp interval_end_time) throws Exception {
ExperimentInterface experimentInterface = new ExperimentInterfaceImpl();
KruizeObject kruizeObject = mainKruizeExperimentMap.get(experimentName);
boolean resultsAvailable = false;
// Load results from the DB and save to local
List<KruizeResultsEntry> kruizeResultsEntries = experimentDAO.loadResultsByExperimentName(experimentName, kruizeObject.getClusterName(), calculated_start_time, interval_end_time);
if (null != kruizeResultsEntries && !kruizeResultsEntries.isEmpty()) {
resultsAvailable = true;
List<UpdateResultsAPIObject> updateResultsAPIObjects = DBHelpers.Converters.KruizeObjectConverters.convertResultEntryToUpdateResultsAPIObject(kruizeResultsEntries);
if (null != updateResultsAPIObjects && !updateResultsAPIObjects.isEmpty()) {
List<ExperimentResultData> resultDataList = new ArrayList<>();
Expand All @@ -162,6 +164,7 @@ public void loadResultsFromDBByName(Map<String, KruizeObject> mainKruizeExperime
experimentInterface.addResultsToLocalStorage(mainKruizeExperimentMap, resultDataList);
}
}
return resultsAvailable;
}

public void loadRecommendationsFromDBByName(Map<String, KruizeObject> mainKruizeExperimentMap, String experimentName) throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ public class KruizeDeploymentInfo {
public static String em_only_mode;
public static Integer bulk_update_results_limit = 100;
public static Boolean local = false;
public static Boolean logAllHttpReqAndResp = false;

public static int generate_recommendations_date_range_limit_in_days = 15;
public static Integer delete_partition_threshold_in_days = DELETE_PARTITION_THRESHOLD_IN_DAYS;
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/autotune/utils/KruizeConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ public static final class KRUIZE_CONFIG_ENV_NAME {
public static final String CLOUDWATCH_LOGS_LOG_STREAM = "cloudwatch_logStream";
public static final String CLOUDWATCH_LOGS_LOG_LEVEL = "cloudwatch_logLevel";
public static final String LOCAL = "local";
public static final String LOG_HTTP_REQ_RESP = "logAllHttpReqAndResp";
}

public static final class RecommendationEngineConstants {
Expand Down
Loading

0 comments on commit d9f3b3c

Please sign in to comment.