Add timeout to urllib open(...).

Without specifying a timeout, urllib can keep waiting for a very long time (not sure if limited by the 2 hour TCP timeout, or just forever). See: https://stackoverflow.com/a/29649638 mesonbuild/meson#4087
joanbm · Jun 9, 2024 · d5dca03 · d5dca03
1 parent f6bec91
commit d5dca03
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 4 deletions.
diff --git a/full_offline_backup_for_todoist/url_downloader.py b/full_offline_backup_for_todoist/url_downloader.py
@@ -15,8 +15,9 @@ class URLDownloader(metaclass=ABCMeta):
     _tracer: Tracer
     _bearer_token: Optional[str]
 
-    def __init__(self, tracer: Tracer):
+    def __init__(self, tracer: Tracer, timeout: int = 300):
         self._tracer = tracer
+        self._timeout = timeout
         self._bearer_token = None
 
     def set_bearer_token(self, bearer_token: Optional[str]) -> None:
@@ -35,7 +36,7 @@ def _download(self, opener: urllib.request.OpenerDirector, url: str,
                   data: Optional[Dict[str, str]]=None) -> bytes:
         """ Downloads the specified URL as bytes using the specified opener """
         encoded_data = urllib.parse.urlencode(data).encode() if data else None
-        with opener.open(url, encoded_data) as url_handle:
+        with opener.open(url, encoded_data, self._timeout) as url_handle:
             return cast(bytes, url_handle.read())
 
     def _download_with_retry(self, opener: urllib.request.OpenerDirector, url: str,

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -57,12 +57,13 @@ def tearDown(self):
         """ Destroys the sample HTTP server for the test """
         self.__httpd.shutdown()
 
-    def __opener_open_redirect_to_local(self, original_self, url, data):
+    def __opener_open_redirect_to_local(self, original_self, url, data, timeout):
         """ Replaces the OpenerDirector.open function of URLLib, in order to redirect all requests
             to a local server.
             This way, we are still able to do the integration test with actual HTTP requests,
             though being handled by a local test HTTP server """
-        return self.__original_opener_open(original_self, "http://127.0.0.1:33327/" + url, data)
+        return self.__original_opener_open(original_self, "http://127.0.0.1:33327/" + url,
+                                           data, timeout)
 
     @staticmethod
     def __get_test_file(subpath):

diff --git a/tests/test_url_downloader.py b/tests/test_url_downloader.py
@@ -3,6 +3,7 @@
 # pylint: disable=invalid-name
 import unittest
 import time
+import socket
 from unittest.mock import patch
 from full_offline_backup_for_todoist.url_downloader import URLLibURLDownloader
 from full_offline_backup_for_todoist.tracer import NullTracer
@@ -73,3 +74,14 @@ def test_urldownloader_throws_on_not_found(self):
 
         # Act/Assert
         self.assertRaises(Exception, urldownloader.get, "http://127.0.0.1:33327/notfound.txt")
+
+    def test_urldownloader_throws_on_timeout(self):
+        """ Tests that the downloader raises an exception on a non-existing file """
+        # Arrange
+        urldownloader = URLLibURLDownloader(NullTracer(), 0.3)
+
+        # Act/Assert
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(('127.0.0.1', 33329))
+            s.listen()
+            self.assertRaises(TimeoutError, urldownloader.get, "http://127.0.0.1:33329")