From b14e85513503575ac594d474e712c8760244562f Mon Sep 17 00:00:00 2001 From: Sara Burns Date: Wed, 18 Dec 2024 14:29:38 -0500 Subject: [PATCH 1/3] fix: fix memory=none and clickhouse query --- .../pythonpath/performance_metrics.py | 26 ++++++++++++++----- .../assets/datasets/query_log.yaml | 2 +- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index 206b1aa45..0d379101a 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -85,6 +85,7 @@ def performance_metrics( if org: extra_filters += [{"col": "org", "op": "IN", "val": org}] + chart_count = 0 with patch("clickhouse_connect.common.build_client_name") as mock_build_client_name: mock_build_client_name.return_value = RUN_ID target_dashboards = ( @@ -115,6 +116,7 @@ def performance_metrics( slice, query_contexts, extra_filters ) result = measure_chart(slice, query_context, fail_on_error) + chart_count += 1 if not result: continue for query in result["queries"]: @@ -129,9 +131,7 @@ def performance_metrics( logger.warning("No target charts found!") return report - logger.info("Waiting for clickhouse log...") - time.sleep(20) - get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_error) + get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_error, chart_count) return report @@ -187,7 +187,7 @@ def measure_chart(slice, query_context_dict, fail_on_error): """ Measure the performance of a chart and return the results. """ - logger.info(f"Fetching slice data: {slice}") + logger.info(f"Fetching slice data: {slice} {slice.uuid}") g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}") query_context = ChartDataQueryContextSchema().load(query_context_dict) @@ -200,6 +200,7 @@ def measure_chart(slice, query_context_dict, fail_on_error): end_time = datetime.now() result["time_elapsed"] = (end_time - start_time).total_seconds() result["slice"] = slice + result["uuid"] = slice.uuid for query in result["queries"]: if "error" in query and query["error"]: raise query["error"] @@ -212,7 +213,7 @@ def measure_chart(slice, query_context_dict, fail_on_error): return result -def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_error): +def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_error, chart_count): """ Get the query log from clickhouse and print the results. """ @@ -228,6 +229,17 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err ch_chart_result = measure_chart(slice, query_context, fail_on_error) + # Run CH query until results for all slices are returned + ch_count = 6 + while ch_count > 0: + if ch_chart_result["queries"][0]["rowcount"] < chart_count: + logger.info("Waiting for clickhouse log...") + time.sleep(5) + ch_chart_result = measure_chart(slice, query_context, fail_on_error) + ch_count -= 1 + else: + break + clickhouse_queries = {} for query in ch_chart_result["queries"]: for row in query["data"]: @@ -237,7 +249,7 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err for k, chart_result in enumerate(report): for query in chart_result["queries"]: parsed_sql = ( - str(sqlparse.parse(query["query"])[0]).replace(";", "") + str(sqlparse.parse(query["query"].strip())[0]).replace(";", "") + "\n FORMAT Native" ) chart_result["sql"] = parsed_sql @@ -255,7 +267,7 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err report_str += report_format.format( i=(k + 1), dashboard=chart_result["dashboard"], - slice=chart_result["slice"], + slice=f'{chart_result["slice"]} {chart_result["uuid"]}', superset_time=chart_result["time_elapsed"], ) for query in chart_result["queries"]: diff --git a/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml index b9c1b9988..764a46000 100644 --- a/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml +++ b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml @@ -118,7 +118,7 @@ sql: | query, http_user_agent FROM system.query_log - WHERE (has(databases, '{{ASPECTS_XAPI_DATABASE}}') OR has(databases, '{{ASPECTS_EVENT_SINK_DATABASE}}') OR has(databases, '{{DBT_PROFILE_TARGET_DATABASE}}')) AND (http_user_agent LIKE 'aspects-%') AND (type = 'QueryFinish') + WHERE databases <> ['system'] AND (http_user_agent LIKE 'aspects-%') AND (type <> 'QueryStart') ORDER BY event_time DESC table_name: query_log template_params: null From 0300e3341c05728cad3743e58400d5f1eaa2b669 Mon Sep 17 00:00:00 2001 From: Sara Burns Date: Wed, 18 Dec 2024 14:32:59 -0500 Subject: [PATCH 2/3] fix: only count successful queries --- .../aspects/apps/superset/pythonpath/performance_metrics.py | 2 +- .../openedx-assets/assets/datasets/query_log.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index 0d379101a..89997862b 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -116,9 +116,9 @@ def performance_metrics( slice, query_contexts, extra_filters ) result = measure_chart(slice, query_context, fail_on_error) - chart_count += 1 if not result: continue + chart_count += 1 for query in result["queries"]: # Remove the data from the query to avoid memory issues on large # datasets. diff --git a/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml index 764a46000..b46e09d33 100644 --- a/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml +++ b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml @@ -118,7 +118,7 @@ sql: | query, http_user_agent FROM system.query_log - WHERE databases <> ['system'] AND (http_user_agent LIKE 'aspects-%') AND (type <> 'QueryStart') + WHERE databases <> ['system'] AND (http_user_agent LIKE 'aspects-%') AND (type = 'QueryFinish') ORDER BY event_time DESC table_name: query_log template_params: null From cfae721aa4112ad61272de7b439de3538b860310 Mon Sep 17 00:00:00 2001 From: Sara Burns Date: Thu, 19 Dec 2024 09:18:20 -0500 Subject: [PATCH 3/3] fix: log missing rows --- .../aspects/apps/superset/pythonpath/performance_metrics.py | 5 +++-- .../openedx-assets/assets/datasets/query_log.yaml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py index 89997862b..ac0b77f02 100644 --- a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py +++ b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py @@ -232,8 +232,9 @@ def get_query_log_from_clickhouse(report, query_contexts, print_sql, fail_on_err # Run CH query until results for all slices are returned ch_count = 6 while ch_count > 0: - if ch_chart_result["queries"][0]["rowcount"] < chart_count: - logger.info("Waiting for clickhouse log...") + missing_rows = chart_count - ch_chart_result["queries"][0]["rowcount"] + if missing_rows > 0: + logger.info(f"Waiting for {missing_rows} clickhouse logs...") time.sleep(5) ch_chart_result = measure_chart(slice, query_context, fail_on_error) ch_count -= 1 diff --git a/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml index b46e09d33..b9c1b9988 100644 --- a/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml +++ b/tutoraspects/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml @@ -118,7 +118,7 @@ sql: | query, http_user_agent FROM system.query_log - WHERE databases <> ['system'] AND (http_user_agent LIKE 'aspects-%') AND (type = 'QueryFinish') + WHERE (has(databases, '{{ASPECTS_XAPI_DATABASE}}') OR has(databases, '{{ASPECTS_EVENT_SINK_DATABASE}}') OR has(databases, '{{DBT_PROFILE_TARGET_DATABASE}}')) AND (http_user_agent LIKE 'aspects-%') AND (type = 'QueryFinish') ORDER BY event_time DESC table_name: query_log template_params: null