Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add defer_connect config to allow eagerly verifying connection #394

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tests/unit/sqlalchemy/test_dialect.py
Original file line number Diff line number Diff line change
@@ -252,7 +252,8 @@ def test_get_default_isolation_level(self):
assert isolation_level == "AUTOCOMMIT"

def test_isolation_level(self):
dbapi_conn = Connection(host="localhost")
# The test only verifies that isolation level is correctly set, no need to attempt actual connection
dbapi_conn = Connection(host="localhost", defer_connect=True)

self.dialect.set_isolation_level(dbapi_conn, "SERIALIZABLE")
assert dbapi_conn._isolation_level == IsolationLevel.SERIALIZABLE
18 changes: 10 additions & 8 deletions tests/unit/test_dbapi.py
Original file line number Diff line number Diff line change
@@ -184,7 +184,8 @@ def test_token_retrieved_once_when_authentication_instance_is_shared(sample_post
conn2.cursor().execute("SELECT 2")
conn2.cursor().execute("SELECT 3")

assert len(_post_statement_requests()) == 7
assert len(_post_statement_requests()) == 9
# assert only a single token request was sent
assert len(_get_token_requests(challenge_id)) == 1


@@ -275,37 +276,38 @@ def test_role_is_set_when_specified(mock_client):


def test_hostname_parsing():
https_server_with_port = Connection("https://mytrinoserver.domain:9999")
# Since this test only verifies URL parsing there is no need to attempt actual connection
https_server_with_port = Connection("https://mytrinoserver.domain:9999", defer_connect=True)
assert https_server_with_port.host == "mytrinoserver.domain"
assert https_server_with_port.port == 9999
assert https_server_with_port.http_scheme == constants.HTTPS

https_server_without_port = Connection("https://mytrinoserver.domain")
https_server_without_port = Connection("https://mytrinoserver.domain", defer_connect=True)
assert https_server_without_port.host == "mytrinoserver.domain"
assert https_server_without_port.port == 8080
assert https_server_without_port.http_scheme == constants.HTTPS

http_server_with_port = Connection("http://mytrinoserver.domain:9999")
http_server_with_port = Connection("http://mytrinoserver.domain:9999", defer_connect=True)
assert http_server_with_port.host == "mytrinoserver.domain"
assert http_server_with_port.port == 9999
assert http_server_with_port.http_scheme == constants.HTTP

http_server_without_port = Connection("http://mytrinoserver.domain")
http_server_without_port = Connection("http://mytrinoserver.domain", defer_connect=True)
assert http_server_without_port.host == "mytrinoserver.domain"
assert http_server_without_port.port == 8080
assert http_server_without_port.http_scheme == constants.HTTP

http_server_with_path = Connection("http://mytrinoserver.domain/some_path")
http_server_with_path = Connection("http://mytrinoserver.domain/some_path", defer_connect=True)
assert http_server_with_path.host == "mytrinoserver.domain/some_path"
assert http_server_with_path.port == 8080
assert http_server_with_path.http_scheme == constants.HTTP

only_hostname = Connection("mytrinoserver.domain")
only_hostname = Connection("mytrinoserver.domain", defer_connect=True)
assert only_hostname.host == "mytrinoserver.domain"
assert only_hostname.port == 8080
assert only_hostname.http_scheme == constants.HTTP

only_hostname_with_path = Connection("mytrinoserver.domain/some_path")
only_hostname_with_path = Connection("mytrinoserver.domain/some_path", defer_connect=True)
assert only_hostname_with_path.host == "mytrinoserver.domain/some_path"
assert only_hostname_with_path.port == 8080
assert only_hostname_with_path.http_scheme == constants.HTTP
28 changes: 28 additions & 0 deletions trino/dbapi.py
Original file line number Diff line number Diff line change
@@ -28,6 +28,8 @@
from typing import Any, Dict, List, NamedTuple, Optional # NOQA for mypy types
from urllib.parse import urlparse

from requests.exceptions import RequestException

try:
from zoneinfo import ZoneInfo
except ModuleNotFoundError:
@@ -157,6 +159,7 @@ def __init__(
legacy_prepared_statements=None,
roles=None,
timezone=None,
defer_connect=False,
):
# Automatically assign http_schema, port based on hostname
parsed_host = urlparse(host, allow_fragments=False)
@@ -201,6 +204,31 @@ def __init__(
self.legacy_primitive_types = legacy_primitive_types
self.legacy_prepared_statements = legacy_prepared_statements

if not defer_connect:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if not defer_connect:
if not defer_connect and auth != constants.DEFAULT_AUTH:

@hovaesco This would mean we'll only do eager connection if some authentication is actually provided. WDYT? Or do you think existing code is better (simpler and we don't assume when someone might want eager connections)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any production use cases when auth is constants.DEFAULT_AUTH but might be helpful in some local testing.

self.connect()

def connect(self) -> None:
connection_test_request = trino.client.TrinoRequest(
self.host,
self.port,
self._client_session,
self._http_session,
self.http_scheme,
self.auth,
self.max_attempts,
self.request_timeout,
verify=self._http_session.verify,
)
try:
test_response = connection_test_request.post("<not-going-to-be-executed>")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@electrum The server doesn't seem to "execute" the query until the first nextUri is followed. i.e. if you send just a POST /v1/statement and then do nothing then query execution doesn't happen.

Is this expected from the protocol? Asking because this change relies on this behaviour to allow verifying that connection is indeed correct - e.g. proper auth instead of failing when the first query is submitted.

This is a UX issue which people complain about when using the Trino CLI or JDBC as well for example.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I discussed this with David offline and he said that's it's expected behaviour.

However if we don't send a DELETE the query might occupy resources on the server. So in this change we should send a subsequent DELETE as well for the query.

Additionally we also discussed about making the protocol more "synchronous" in future so that connection verification happens eagerly always.

@Shaheer-rossoneri14 Can you adjust this code to send a DELETE for the fake query we executed as well.

response_content = test_response.content if test_response.content else ""
if not test_response.ok:
raise trino.exceptions.TrinoConnectionError(
"error {}: {}".format(test_response.status_code, response_content))

except RequestException as e:
raise trino.exceptions.TrinoConnectionError("connection failed: {}".format(e))

@property
def isolation_level(self):
return self._isolation_level