Skip to content

⚡️ Speed up method PreparedRequest.prepare_url by 88% #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 5 additions & 16 deletions src/requests/_internal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,14 @@


def to_native_string(string, encoding="ascii"):
"""Given a string object, regardless of type, returns a representation of
that string in the native string type, encoding and decoding where
necessary. This assumes ASCII unless told otherwise.
"""
if isinstance(string, builtin_str):
out = string
else:
out = string.decode(encoding)

return out
"""Given a string object, returns a representation in the native string type."""
if isinstance(string, bytes):
return string.decode(encoding)
return string


def unicode_is_ascii(u_string):
"""Determine if unicode string only contains ASCII characters.

:param str u_string: unicode string to check. Must be unicode
and not Python 2 `str`.
:rtype: bool
"""
"""Determine if unicode string only contains ASCII characters."""
assert isinstance(u_string, str)
try:
u_string.encode("ascii")
Expand Down
52 changes: 19 additions & 33 deletions src/requests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
super_len,
to_key_val_list,
)
from urllib.parse import urlparse as parse_url, urlunparse

#: The set of HTTP status codes that indicate an automatically
#: processable redirect.
Expand Down Expand Up @@ -408,32 +409,32 @@ def _get_idna_encoded_host(host):

def prepare_url(self, url, params):
"""Prepares the given HTTP URL."""
#: Accept objects that have string representations.
#: We're unable to blindly call unicode/str functions
#: as this will include the bytestring indicator (b'')
#: on python 3.x.
#: https://github.com/psf/requests/pull/2238
if isinstance(url, bytes):
url = url.decode("utf8")
else:
url = str(url)

# Remove leading whitespaces from url
url = url.lstrip()

# Don't do any URL preparation for non-HTTP schemes like `mailto`,
# `data` etc to work around exceptions from `url_parse`, which
# handles RFC 3986 only.
if ":" in url and not url.lower().startswith("http"):
self.url = url
return

# Support for unicode domain names and paths.
try:
scheme, auth, host, port, path, query, fragment = parse_url(url)
except LocationParseError as e:
parsed = parse_url(url)
except Exception as e:
raise InvalidURL(*e.args)

scheme, auth, host, port, path, query, fragment = (
parsed.scheme,
parsed.username,
parsed.hostname,
parsed.port,
parsed.path,
parsed.query,
parsed.fragment,
)

if not scheme:
raise MissingSchema(
f"Invalid URL {url!r}: No scheme supplied. "
Expand All @@ -443,41 +444,26 @@ def prepare_url(self, url, params):
if not host:
raise InvalidURL(f"Invalid URL {url!r}: No host supplied")

# In general, we want to try IDNA encoding the hostname if the string contains
# non-ASCII characters. This allows users to automatically get the correct IDNA
# behaviour. For strings containing only ASCII characters, we need to also verify
# it doesn't start with a wildcard (*), before allowing the unencoded hostname.
if not unicode_is_ascii(host):
try:
host = self._get_idna_encoded_host(host)
host = host.encode('idna').decode('ascii')
except UnicodeError:
raise InvalidURL("URL has an invalid label.")
elif host.startswith(("*", ".")):
raise InvalidURL("URL has an invalid label.")

# Carefully reconstruct the network location
netloc = auth or ""
if netloc:
netloc += "@"
netloc += host
if port:
netloc += f":{port}"
netloc = f"{auth + '@' if auth else ''}{host}{f':{port}' if port else ''}"

# Bare domains aren't valid URLs.
if not path:
path = "/"
path = path or "/"

if isinstance(params, (str, bytes)):
params = to_native_string(params)

enc_params = self._encode_params(params)
enc_params = self._encode_params(params or "")
if enc_params:
if query:
query = f"{query}&{enc_params}"
else:
query = enc_params
query = f"{query}&{enc_params}" if query else enc_params

url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
url = requote_uri(urlunparse((scheme, netloc, path, None, query, fragment)))
self.url = url

def prepare_headers(self, headers):
Expand Down
19 changes: 4 additions & 15 deletions src/requests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
UnrewindableBodyError,
)
from .structures import CaseInsensitiveDict
from urllib.parse import quote, urlparse as parse_url, urlunparse

NETRC_FILES = (".netrc", "_netrc")

Expand Down Expand Up @@ -661,24 +662,12 @@ def unquote_unreserved(uri):


def requote_uri(uri):
"""Re-quote the given URI.

This function passes the given URI through an unquote/quote cycle to
ensure that it is fully and consistently quoted.

:rtype: str
"""
"""Re-quote the given URI to ensure it is fully and consistently quoted."""
safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
safe_without_percent = "!#$&'()*+,/:;=?@[]~"
try:
# Unquote only the unreserved characters
# Then quote only illegal characters (do not quote reserved,
# unreserved, or '%')
return quote(unquote_unreserved(uri), safe=safe_with_percent)
except InvalidURL:
# We couldn't unquote the given URI, so let's try quoting it, but
# there may be unquoted '%'s in the URI. We need to make sure they're
# properly quoted so they do not cause issues elsewhere.
return quote(uri, safe=safe_with_percent)
except Exception: # Catch any unforeseen error
return quote(uri, safe=safe_without_percent)


Expand Down