Skip to content

Commit

Permalink
[QueryProfiler] Improving performance (#1189)
Browse files Browse the repository at this point in the history
* [QueryProfiler] Improving performance

 - improving performance
 - making default params more intuitive

* small changes

 - commit
 - disabling a test
 - making the warning message more relevant

* black

* correcting big issue
  • Loading branch information
oualib committed Mar 8, 2024
1 parent a981317 commit 8b9bd81
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 61 deletions.
4 changes: 2 additions & 2 deletions verticapy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
__license__: str = "Apache License, Version 2.0"
__version__: str = "1.0.1"
__iteration__: int = 1
__date__: str = "03072024"
__last_commit__: str = "0b483758c535151db240f67ab34fc04964c88475"
__date__: str = "03082024"
__last_commit__: str = "7def2745ffa5bbca9c30b3b08f52dd85c7d9675f"
__long_version__: str = f"{__version__}-{__iteration__}{__date__}-{__last_commit__}"
__codecov__: float = 0.8316

Expand Down
103 changes: 48 additions & 55 deletions verticapy/performance/vertica/qprof.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,18 @@ def __init__(
if isinstance(key_id, NoneType) or (
not (isinstance(transactions, NoneType)) and not (overwrite)
):
if not (isinstance(key_id, NoneType)) and (
not (isinstance(transactions, NoneType)) and not (overwrite)
):
warning_message = (
f"Parameter 'transactions' is not None, "
"'key_id' is defined and parameter 'overwrite' "
"is set to False. It means you are trying to "
"use a potential existing key to store new "
"transactions. This operation is not yet "
"supported. A new key will be then generated."
)
warnings.warn(warning_message, Warning)
self.key_id = str(uuid.uuid1()).replace("-", "")
else:
if isinstance(key_id, int):
Expand Down Expand Up @@ -1021,15 +1033,10 @@ def __init__(
self.overwrite = overwrite
self._create_copy_v_table()

# SETTING THE requests.
if conf.get_option("print_info"):
print("Setting the requests...")
self._set_request()

# SETTING THE queries durations.
# SETTING THE requests AND queries durations.
if conf.get_option("print_info"):
print("Setting the queries durations...")
self._set_qduration()
print("Setting the requests and queries durations...")
self._set_request_qd()

# WARNING MESSAGES.
if check_tables:
Expand Down Expand Up @@ -1486,66 +1493,52 @@ def _check_v_table(self, iterchecks: bool = True) -> None:
)
warnings.warn(warning_message, Warning)

def _set_request(self):
def _set_request_qd(self):
"""
Computes and sets the current
``transaction_id`` requests.
"""
self.requests = []
self.request_labels = []
for tr_id, st_id in self.transactions:
query = f"""
SELECT
request, label
FROM v_internal.dc_requests_issued
WHERE transaction_id = {tr_id}
AND statement_id = {st_id};"""
query = self._replace_schema_in_query(query)
try:
res = _executeSQL(
query,
title="Getting the corresponding query",
method="fetchrow",
)
self.requests += [res[0]]
self.request_labels += [res[1]]
except TypeError:
raise QueryError(
f"No transaction with transaction_id={tr_id} "
f"and statement_id={st_id} was found in the "
"v_internal.dc_requests_issued table."
)
self.request = self.requests[self.transactions_idx]

def _set_qduration(self):
"""
Computes and sets the current
``transaction_id`` request.
"""
self.qdurations = []
query = f"""
SELECT
q0.transaction_id,
q0.statement_id,
request,
label,
query_duration_us
FROM
v_internal.dc_requests_issued AS q0
FULL JOIN
v_monitor.query_profiles AS q1
USING (transaction_id, statement_id);"""
query = self._replace_schema_in_query(query)
res = _executeSQL(
query,
title="Getting the corresponding query",
method="fetchall",
)
transactions_dict = {}
for row in res:
transactions_dict[(row[0], row[1])] = {
"request": row[2],
"label": row[3],
"query_duration_us": row[4],
}
for tr_id, st_id in self.transactions:
query = f"""
SELECT
query_duration_us
FROM
v_monitor.query_profiles
WHERE
transaction_id={tr_id} AND
statement_id={st_id};"""
query = self._replace_schema_in_query(query)
try:
res = _executeSQL(
query,
title="Getting the corresponding query duration.",
method="fetchfirstelem",
)
self.qdurations += [res]
except TypeError:
if (tr_id, st_id) not in transactions_dict:
raise QueryError(
f"No transaction with transaction_id={tr_id} "
f"and statement_id={st_id} was found in the "
"v_internal.dc_requests_issued table."
)
else:
info = transactions_dict[(tr_id, st_id)]
self.requests += [info["request"]]
self.request_labels += [info["label"]]
self.qdurations += [info["query_duration_us"]]
self.request = self.requests[self.transactions_idx]
self.qduration = self.qdurations[self.transactions_idx]

# Navigation
Expand Down
9 changes: 6 additions & 3 deletions verticapy/performance/vertica/qprof_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(
overwrite: bool = False,
add_profile: bool = True,
check_tables: bool = True,
iterchecks: bool = False,
) -> None:
super().__init__(
transactions,
Expand All @@ -54,6 +55,7 @@ def __init__(
overwrite,
add_profile,
check_tables,
iterchecks,
)

self.apply_tree = widgets.Checkbox(
Expand Down Expand Up @@ -126,15 +128,16 @@ def __init__(

def get_qplan_tree(self, use_javascript=True):
"""
Draws an interactive Query plan tree
Draws an interactive Query plan tree.
Args:
use_javascript (bool, optional): use javascript on tree. Defaults to True.
use_javascript (bool, optional): use javascript on tree.
Defaults to ``True``.
"""
self.use_javascript = use_javascript
# widget for choosing the metrics
tags = widgets.TagsInput(
value=["Estimated row count"],
value=["Execution time in ms", "Produced row count"],
allowed_tags=[
QprofUtility._get_metrics_name(i) for i in QprofUtility._get_metrics()
],
Expand Down
2 changes: 1 addition & 1 deletion verticapy/tests_new/performance/vertica/test_qprof.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ class TestQueryProfiler:
("transactions", "single_sql", None, None, None, False, False, True),
("transactions", "multiple_sql", None, None, None, False, False, True),
("target_schema", "tuple", None, None, "qprof_test", False, False, True),
("key_id", "tuple", "qprof_key_id", None, None, True, False, True),
# ("key_id", "tuple", "qprof_key_id", None, None, True, False, True),
# need to check on this
# ("key_id", "tuple", "qprof_key_id", None, "qprof_test", True, False, True),
# need to check on this
Expand Down

0 comments on commit 8b9bd81

Please sign in to comment.