diff --git a/courlan/network.py b/courlan/network.py index b637ff5..a57e65f 100644 --- a/courlan/network.py +++ b/courlan/network.py @@ -3,47 +3,20 @@ """ import logging +import ssl -import urllib3 +from urllib import request +import certifi -LOGGER = logging.getLogger(__name__) -urllib3.disable_warnings() +CERTIFI_CONTEXT = ssl.create_default_context(cafile=certifi.where()) -RETRY_STRATEGY = urllib3.util.Retry( - total=2, - redirect=2, - raise_on_redirect=False, - status_forcelist=[ - 429, - 499, - 500, - 502, - 503, - 504, - 509, - 520, - 521, - 522, - 523, - 524, - 525, - 526, - 527, - 530, - 598, - ], # unofficial: https://en.wikipedia.org/wiki/List_of_HTTP_status_codes#Unofficial_codes - backoff_factor=1, -) -HTTP_POOL = urllib3.PoolManager( - cert_reqs="CERT_NONE", num_pools=100, retries=RETRY_STRATEGY, timeout=10 -) +LOGGER = logging.getLogger(__name__) ACCEPTABLE_CODES = {200, 300, 301, 302, 303, 304, 305, 306, 307, 308} -# Test redirects def redirection_test(url: str) -> str: """Test final URL to handle redirects Args: @@ -55,17 +28,13 @@ def redirection_test(url: str) -> str: Raises: Nothing. """ - # headers.update({ - # "User-Agent" : str(sample(settings.USER_AGENTS, 1)), # select a random user agent - # }) try: - rhead = HTTP_POOL.request("HEAD", url) # type:ignore[no-untyped-call] + req = request.Request(url, method="HEAD") + with request.urlopen(req, context=CERTIFI_CONTEXT) as f: + pass + if f.status in ACCEPTABLE_CODES: + return f.url except Exception as err: - LOGGER.exception("unknown error: %s %s", url, err) - else: - # response - if rhead.status in ACCEPTABLE_CODES: - LOGGER.debug("result found: %s %s", rhead.geturl(), rhead.status) - return rhead.geturl() # type: ignore - # else: - raise ValueError(f"cannot reach URL: ${url}") + LOGGER.warning("unknown error: %s %s", url, err) + + raise ValueError(f"cannot reach URL: {url}") diff --git a/setup.py b/setup.py index d7ff402..811099e 100644 --- a/setup.py +++ b/setup.py @@ -108,10 +108,9 @@ def get_long_description(): python_requires=">=3.6", install_requires=[ "babel >= 2.11.0", + "certifi", "tld == 0.12.6; python_version < '3.7'", "tld >= 0.13; python_version >= '3.7'", - "urllib3 >= 1.26, < 2; python_version < '3.7'", - "urllib3 >= 1.26, < 3; python_version >= '3.7'", ], # extras_require=extras, entry_points={ diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 307fa43..f5cce1a 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -743,11 +743,11 @@ def test_domain_filter(): def test_urlcheck_redirects(): "Test redirection checks." - assert check_url("https://www.httpbun.com/status/200", with_redirects=True) == ( - "https://httpbun.com", - "httpbun.com", + assert check_url("https://www.httpbin.org/status/301", with_redirects=True) == ( + "https://www.httpbin.org/get", + "httpbin.org", ) - assert check_url("https://www.httpbin.org/status/404", with_redirects=True) is None + assert check_url("https://httpbun.com/status/404", with_redirects=True) is None assert check_url("https://www.ht.or", with_redirects=True) is None