From d5dca03ddd8aaa15024a4bd50cdc4c25e4903886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Bruguera=20Mic=C3=B3?= Date: Sun, 9 Jun 2024 21:11:51 +0000 Subject: [PATCH] Add timeout to urllib open(...). Without specifying a timeout, urllib can keep waiting for a very long time (not sure if limited by the 2 hour TCP timeout, or just forever). See: https://stackoverflow.com/a/29649638 https://github.com/mesonbuild/meson/issues/4087 --- full_offline_backup_for_todoist/url_downloader.py | 5 +++-- tests/test_integration.py | 5 +++-- tests/test_url_downloader.py | 12 ++++++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/full_offline_backup_for_todoist/url_downloader.py b/full_offline_backup_for_todoist/url_downloader.py index 94dfab6..84e4fc9 100644 --- a/full_offline_backup_for_todoist/url_downloader.py +++ b/full_offline_backup_for_todoist/url_downloader.py @@ -15,8 +15,9 @@ class URLDownloader(metaclass=ABCMeta): _tracer: Tracer _bearer_token: Optional[str] - def __init__(self, tracer: Tracer): + def __init__(self, tracer: Tracer, timeout: int = 300): self._tracer = tracer + self._timeout = timeout self._bearer_token = None def set_bearer_token(self, bearer_token: Optional[str]) -> None: @@ -35,7 +36,7 @@ def _download(self, opener: urllib.request.OpenerDirector, url: str, data: Optional[Dict[str, str]]=None) -> bytes: """ Downloads the specified URL as bytes using the specified opener """ encoded_data = urllib.parse.urlencode(data).encode() if data else None - with opener.open(url, encoded_data) as url_handle: + with opener.open(url, encoded_data, self._timeout) as url_handle: return cast(bytes, url_handle.read()) def _download_with_retry(self, opener: urllib.request.OpenerDirector, url: str, diff --git a/tests/test_integration.py b/tests/test_integration.py index 3626575..181c301 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -57,12 +57,13 @@ def tearDown(self): """ Destroys the sample HTTP server for the test """ self.__httpd.shutdown() - def __opener_open_redirect_to_local(self, original_self, url, data): + def __opener_open_redirect_to_local(self, original_self, url, data, timeout): """ Replaces the OpenerDirector.open function of URLLib, in order to redirect all requests to a local server. This way, we are still able to do the integration test with actual HTTP requests, though being handled by a local test HTTP server """ - return self.__original_opener_open(original_self, "http://127.0.0.1:33327/" + url, data) + return self.__original_opener_open(original_self, "http://127.0.0.1:33327/" + url, + data, timeout) @staticmethod def __get_test_file(subpath): diff --git a/tests/test_url_downloader.py b/tests/test_url_downloader.py index 601399d..f5b56f9 100644 --- a/tests/test_url_downloader.py +++ b/tests/test_url_downloader.py @@ -3,6 +3,7 @@ # pylint: disable=invalid-name import unittest import time +import socket from unittest.mock import patch from full_offline_backup_for_todoist.url_downloader import URLLibURLDownloader from full_offline_backup_for_todoist.tracer import NullTracer @@ -73,3 +74,14 @@ def test_urldownloader_throws_on_not_found(self): # Act/Assert self.assertRaises(Exception, urldownloader.get, "http://127.0.0.1:33327/notfound.txt") + + def test_urldownloader_throws_on_timeout(self): + """ Tests that the downloader raises an exception on a non-existing file """ + # Arrange + urldownloader = URLLibURLDownloader(NullTracer(), 0.3) + + # Act/Assert + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('127.0.0.1', 33329)) + s.listen() + self.assertRaises(TimeoutError, urldownloader.get, "http://127.0.0.1:33329")