Merge pull request kruize#1458 from chandrams/stress-test

Included stress test for Bulk API
dinogun · Feb 19, 2025 · b5b1567 · b5b1567
2 parents 8d41db9 + 49e741a
commit b5b1567
Show file tree

Hide file tree

Showing 7 changed files with 759 additions and 21 deletions.
diff --git a/tests/scripts/helpers/kruize.py b/tests/scripts/helpers/kruize.py
@@ -18,7 +18,6 @@
 import requests
 import subprocess
 
-
 def get_kruize_url():
     return URL
 
@@ -475,33 +474,62 @@ def generate_recommendations(experiment_name):
     print("\n************************************************************")
     return response
 
+def log_message(message, logger=None):
+ if logger:
+      logger.info(message)
+ else:
+      print(message)
 
-def post_bulk_api(input_json_file):
-    print("\n************************************************************")
-    print("Sending POST request to URL: ", f"{URL}/bulk")
-    print("Request Payload: ", input_json_file)
-    curl_command = f"curl -X POST {URL}/bulk -H 'Content-Type: application/json' -d '{json.dumps(input_json_file)}'"
-    print("Equivalent cURL command: ", curl_command)
+def post_bulk_api(input_json_file, logger=None):
+    msg = ("\n************************************************************")
+    log_message(msg, logger)
+
+    msg = f"Sending POST request to URL: {URL}/bulk"
+    log_message(msg, logger)
+
+    msg = logger.info(f"Request Payload: {input_json_file}")
+    log_message(msg, logger)
+
+    msg = f"curl -X POST {URL}/bulk -H 'Content-Type: application/json' -d '{json.dumps(input_json_file)}'"
+    log_message(msg, logger)
 
     # Send the POST request
     response = requests.post(f"{URL}/bulk", json=input_json_file)
-    print("Response Status Code: ", response.status_code)
-    print("Response JSON: ", response.json())
+
+    msg = f"Response Status Code: {response.status_code}"
+    log_message(msg, logger)
+
+    msg = f"Response JSON: {response.json()}"
+    log_message(msg, logger)
+
     return response
 
 
-def get_bulk_job_status(job_id, verbose=False):
-    print("\n************************************************************")
+def get_bulk_job_status(job_id,include=None,logger=None):
+    msg = "\n************************************************************"
+    log_message(msg, logger)
+
     url_basic = f"{URL}/bulk?job_id={job_id}"
-    url_verbose = f"{URL}/bulk?job_id={job_id}&verbose={verbose}"
+    url_include = f"{URL}/bulk?job_id={job_id}&include={include}"
     getJobIDURL = url_basic
-    if verbose:
-        getJobIDURL = url_verbose
-    print("Sending GET request to URL ( verbose=", verbose, " ): ", getJobIDURL)
-    curl_command_verbose = f"curl -X GET '{getJobIDURL}'"
-    print("Equivalent cURL command : ", curl_command_verbose)
-    response = requests.get(url_verbose)
-
-    print("Verbose GET Response Status Code: ", response.status_code)
-    print("Verbose GET Response JSON: ", response.json())
+    if include:
+        getJobIDURL = url_include
+
+    msg = f"Sending GET request to URL ( include={include} ): {getJobIDURL}"
+    log_message(msg, logger)
+
+    curl_command_include = f"curl -X GET '{getJobIDURL}'"
+
+    msg = f"Equivalent cURL command : {curl_command_include}"
+    log_message(msg, logger)
+
+    response = requests.get(url_include)
+
+    msg = f"Include GET Response Status Code: {response.status_code}"
+    log_message(msg, logger)
+
+    if logger and include == "summary" or logger == None:
+         msg = f"Include GET Response JSON: {response.json()}"
+         log_message(msg, logger)
+
     return response
diff --git a/tests/scripts/local_monitoring_tests/bulk_stress_test.md b/tests/scripts/local_monitoring_tests/bulk_stress_test.md
@@ -0,0 +1,64 @@
+# **Kruize Bulk API stress test**
+
+Kruize Bulk API stress test validates the behaviour of [Kruize Bulk APIs](/design/BulkAPI.md) by loading these APIs with multiple requests to generate recommendations 
+
+## Tests description
+- **Kruize Bulk API stress test**
+   The test does the following:
+   - Deploys kruize in non-CRD mode using the [deploy script](https://github.com/kruize/autotune/blob/master/deploy.sh) from the autotune repo
+   - Creates a resource optimization metric profile using the [createMetricProfile API](/design/MetricProfileAPI.md) 
+   - Runs any of the specified tests below:
+     - No config test - In this test the Bulk API is invoked parallely with an empty bulk configuration
+     - Time range test - In this test the Bulk API is invoked parallely with the same time range. Here the time range
+       considered is end time specified by the user and start time will be the difference between the end time and the 
+       interval hours
+     - Split Time range test - In this test the Bulk API is invoked parallely with different time ranges. Here the 
+       time ranges will be derived by splitting the days of usage metrics or results by the interval hours starting from the end date specified
+   - Once the Bulk job is created, the test gets the bulk job status and on completion fetches the recommendations for the processed experiments
+
+## Prerequisites for running the tests:
+- Minikube setup or access to Openshift cluster
+- Tools like kubectl, oc, curl, jq
+
+- To test with Thanos datasource, Thanos setup with tsdb blocks containing usage metrics is required.
+
+## How to run the test?
+
+Use the below command to test :
+
+```
+<KRUIZE_REPO>/tests/scripts/local_monitoring_tests/bulk_stress_test/bulk_stress_test.sh [-i Kruize image] [-w No. of workers] [-d No. of day of results] [-t interval hours (default - 2)] [-s End date of tsdb block]
+      [-a kruize replicas] [-r <resultsdir path>] [--skipsetup skip kruize setup] [ -z to test with prometheus datasource] [--test Specify the test to be run] [--url Thanos Datasource url]
+```
+
+Where values for bulk_stress_test.sh are:
+
+```
+usage: bulk_stress_test.sh 
+        [ -i ] : optional. Kruize docker image to be used for testing
+                 default - quay.io/kruize/autotune:mvp_demo 
+	    [ -r ] : Results directory path
+	    [ -w ] : No. of parallel workers (default - 5)
+	    [ -d ] : No. of days of usage metrics / results (default - 15)
+        [ -t ] : interval hours (default - 2)
+        [ -s ] : Initial end date (default - current date & time)
+        [ -a ] : kruize replicas (default - 3)
+        [ -z ] : To register prometheus datasource with kruize
+        [ --test ] : Specify the test to be run [time_range/no_config/time_range_split] (default - time_range)
+        [ --url ]: Thanos datasource url (default - http://thanos-query-frontend-example-query.thanos-operator-system.svc.cluster.local:9090/)]"
+        [ --skipsetup ] : skip kruize setup] 
+```
+
+For example, to run the default time_range test with prometheus datasource, execute the below command:
+
+```
+<AUTOTUNE_REPO>/tests/scripts/local_monitoring_tests/bulk_stress_test/bulk_stress_test.sh -r /tmp/stress-test-results -i quay.io/kruize/autotune_operator:0.3 -a 1 -z
+```
+For example, to run the time_range_split test with thanos datasource, execute the below command:
+
+```
+<AUTOTUNE_REPO>/tests/scripts/local_monitoring_tests/bulk_stress_test/bulk_stress_test.sh -r /tmp/stress-test-results -i quay.io/kruize/autotune_operator:0.3 -a 1 --test=time_range_split -s "2025-01-28T06:20:00.000Z"
+```
+Note - Here the TSDB blocks are available for 15 days (which is the default) and have an end date of 2025-01-28T06:20:00.000Z
+
+Once the tests are complete, verify if there are no errors or exceptions in the logs.