From 3ecffcb81c931bb99f8ff5fff350539cdd5a1579 Mon Sep 17 00:00:00 2001 From: Karim shoair Date: Mon, 16 Dec 2024 01:27:18 +0200 Subject: [PATCH] style: Rewrite sync Fetchers tests to a cleaner version --- tests/fetchers/sync/test_camoufox.py | 77 ++++++++-------- tests/fetchers/sync/test_httpx.py | 118 ++++++++++++++----------- tests/fetchers/sync/test_playwright.py | 103 +++++++++++---------- 3 files changed, 162 insertions(+), 136 deletions(-) diff --git a/tests/fetchers/sync/test_camoufox.py b/tests/fetchers/sync/test_camoufox.py index fcbf3b7..33800f4 100644 --- a/tests/fetchers/sync/test_camoufox.py +++ b/tests/fetchers/sync/test_camoufox.py @@ -1,49 +1,52 @@ -import unittest - +import pytest import pytest_httpbin from scrapling import StealthyFetcher @pytest_httpbin.use_class_based_httpbin -# @pytest_httpbin.use_class_based_httpbin_secure -class TestStealthyFetcher(unittest.TestCase): - def setUp(self): - self.fetcher = StealthyFetcher(auto_match=False) - url = self.httpbin.url - self.status_200 = f'{url}/status/200' - self.status_404 = f'{url}/status/404' - self.status_501 = f'{url}/status/501' - self.basic_url = f'{url}/get' - self.html_url = f'{url}/html' - self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response - self.cookies_url = f"{url}/cookies/set/test/value" +class TestStealthyFetcher: + @pytest.fixture(scope="class") + def fetcher(self): + """Fixture to create a StealthyFetcher instance for the entire test class""" + return StealthyFetcher(auto_match=False) + + @pytest.fixture(autouse=True) + def setup_urls(self, httpbin): + """Fixture to set up URLs for testing""" + self.status_200 = f'{httpbin.url}/status/200' + self.status_404 = f'{httpbin.url}/status/404' + self.status_501 = f'{httpbin.url}/status/501' + self.basic_url = f'{httpbin.url}/get' + self.html_url = f'{httpbin.url}/html' + self.delayed_url = f'{httpbin.url}/delay/10' # 10 Seconds delay response + self.cookies_url = f"{httpbin.url}/cookies/set/test/value" - def test_basic_fetch(self): + def test_basic_fetch(self, fetcher): """Test doing basic fetch request with multiple statuses""" - self.assertEqual(self.fetcher.fetch(self.status_200).status, 200) - self.assertEqual(self.fetcher.fetch(self.status_404).status, 404) - self.assertEqual(self.fetcher.fetch(self.status_501).status, 501) + assert fetcher.fetch(self.status_200).status == 200 + assert fetcher.fetch(self.status_404).status == 404 + assert fetcher.fetch(self.status_501).status == 501 - def test_networkidle(self): + def test_networkidle(self, fetcher): """Test if waiting for `networkidle` make page does not finish loading or not""" - self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200) + assert fetcher.fetch(self.basic_url, network_idle=True).status == 200 - def test_blocking_resources(self): + def test_blocking_resources(self, fetcher): """Test if blocking resources make page does not finish loading or not""" - self.assertEqual(self.fetcher.fetch(self.basic_url, block_images=True).status, 200) - self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200) + assert fetcher.fetch(self.basic_url, block_images=True).status == 200 + assert fetcher.fetch(self.basic_url, disable_resources=True).status == 200 - def test_waiting_selector(self): + def test_waiting_selector(self, fetcher): """Test if waiting for a selector make page does not finish loading or not""" - self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200) + assert fetcher.fetch(self.html_url, wait_selector='h1').status == 200 + assert fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status == 200 - def test_cookies_loading(self): + def test_cookies_loading(self, fetcher): """Test if cookies are set after the request""" - self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'}) + assert fetcher.fetch(self.cookies_url).cookies == {'test': 'value'} - def test_automation(self): + def test_automation(self, fetcher): """Test if automation break the code or not""" def scroll_page(page): page.mouse.wheel(10, 0) @@ -51,15 +54,15 @@ def scroll_page(page): page.mouse.up() return page - self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200) + assert fetcher.fetch(self.html_url, page_action=scroll_page).status == 200 - def test_properties(self): + def test_properties(self, fetcher): """Test if different arguments breaks the code or not""" - self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status, 200) + assert fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status == 200 + assert fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status == 200 + assert fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status == 200 + assert fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status == 200 - def test_infinite_timeout(self): + def test_infinite_timeout(self, fetcher): """Test if infinite timeout breaks the code or not""" - self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200) + assert fetcher.fetch(self.delayed_url, timeout=None).status == 200 diff --git a/tests/fetchers/sync/test_httpx.py b/tests/fetchers/sync/test_httpx.py index 1a5cc02..9f5ca80 100644 --- a/tests/fetchers/sync/test_httpx.py +++ b/tests/fetchers/sync/test_httpx.py @@ -1,68 +1,82 @@ -import unittest - +import pytest import pytest_httpbin from scrapling import Fetcher @pytest_httpbin.use_class_based_httpbin -class TestFetcher(unittest.TestCase): - def setUp(self): - self.fetcher = Fetcher(auto_match=False) - url = self.httpbin.url - self.status_200 = f'{url}/status/200' - self.status_404 = f'{url}/status/404' - self.status_501 = f'{url}/status/501' - self.basic_url = f'{url}/get' - self.post_url = f'{url}/post' - self.put_url = f'{url}/put' - self.delete_url = f'{url}/delete' - self.html_url = f'{url}/html' +class TestFetcher: + @pytest.fixture(scope="class") + def fetcher(self): + """Fixture to create a Fetcher instance for the entire test class""" + return Fetcher(auto_match=False) + + @pytest.fixture(autouse=True) + def setup_urls(self, httpbin): + """Fixture to set up URLs for testing""" + self.status_200 = f'{httpbin.url}/status/200' + self.status_404 = f'{httpbin.url}/status/404' + self.status_501 = f'{httpbin.url}/status/501' + self.basic_url = f'{httpbin.url}/get' + self.post_url = f'{httpbin.url}/post' + self.put_url = f'{httpbin.url}/put' + self.delete_url = f'{httpbin.url}/delete' + self.html_url = f'{httpbin.url}/html' - def test_basic_get(self): + def test_basic_get(self, fetcher): """Test doing basic get request with multiple statuses""" - self.assertEqual(self.fetcher.get(self.status_200).status, 200) - self.assertEqual(self.fetcher.get(self.status_404).status, 404) - self.assertEqual(self.fetcher.get(self.status_501).status, 501) + assert fetcher.get(self.status_200).status == 200 + assert fetcher.get(self.status_404).status == 404 + assert fetcher.get(self.status_501).status == 501 - def test_get_properties(self): + def test_get_properties(self, fetcher): """Test if different arguments with GET request breaks the code or not""" - self.assertEqual(self.fetcher.get(self.status_200, stealthy_headers=True).status, 200) - self.assertEqual(self.fetcher.get(self.status_200, follow_redirects=True).status, 200) - self.assertEqual(self.fetcher.get(self.status_200, timeout=None).status, 200) - self.assertEqual( - self.fetcher.get(self.status_200, stealthy_headers=True, follow_redirects=True, timeout=None).status, - 200 - ) + assert fetcher.get(self.status_200, stealthy_headers=True).status == 200 + assert fetcher.get(self.status_200, follow_redirects=True).status == 200 + assert fetcher.get(self.status_200, timeout=None).status == 200 + assert fetcher.get( + self.status_200, + stealthy_headers=True, + follow_redirects=True, + timeout=None + ).status == 200 - def test_post_properties(self): + def test_post_properties(self, fetcher): """Test if different arguments with POST request breaks the code or not""" - self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}).status, 200) - self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status, 200) - self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status, 200) - self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status, 200) - self.assertEqual( - self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status, - 200 - ) + assert fetcher.post(self.post_url, data={'key': 'value'}).status == 200 + assert fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status == 200 + assert fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status == 200 + assert fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status == 200 + assert fetcher.post( + self.post_url, + data={'key': 'value'}, + stealthy_headers=True, + follow_redirects=True, + timeout=None + ).status == 200 - def test_put_properties(self): + def test_put_properties(self, fetcher): """Test if different arguments with PUT request breaks the code or not""" - self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}).status, 200) - self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status, 200) - self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status, 200) - self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status, 200) - self.assertEqual( - self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status, - 200 - ) + assert fetcher.put(self.put_url, data={'key': 'value'}).status == 200 + assert fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status == 200 + assert fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status == 200 + assert fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status == 200 + assert fetcher.put( + self.put_url, + data={'key': 'value'}, + stealthy_headers=True, + follow_redirects=True, + timeout=None + ).status == 200 - def test_delete_properties(self): + def test_delete_properties(self, fetcher): """Test if different arguments with DELETE request breaks the code or not""" - self.assertEqual(self.fetcher.delete(self.delete_url, stealthy_headers=True).status, 200) - self.assertEqual(self.fetcher.delete(self.delete_url, follow_redirects=True).status, 200) - self.assertEqual(self.fetcher.delete(self.delete_url, timeout=None).status, 200) - self.assertEqual( - self.fetcher.delete(self.delete_url, stealthy_headers=True, follow_redirects=True, timeout=None).status, - 200 - ) + assert fetcher.delete(self.delete_url, stealthy_headers=True).status == 200 + assert fetcher.delete(self.delete_url, follow_redirects=True).status == 200 + assert fetcher.delete(self.delete_url, timeout=None).status == 200 + assert fetcher.delete( + self.delete_url, + stealthy_headers=True, + follow_redirects=True, + timeout=None + ).status == 200 diff --git a/tests/fetchers/sync/test_playwright.py b/tests/fetchers/sync/test_playwright.py index a22ecfd..e1f424c 100644 --- a/tests/fetchers/sync/test_playwright.py +++ b/tests/fetchers/sync/test_playwright.py @@ -1,78 +1,87 @@ -import unittest - +import pytest import pytest_httpbin from scrapling import PlayWrightFetcher @pytest_httpbin.use_class_based_httpbin -# @pytest_httpbin.use_class_based_httpbin_secure -class TestPlayWrightFetcher(unittest.TestCase): - def setUp(self): - self.fetcher = PlayWrightFetcher(auto_match=False) - url = self.httpbin.url - self.status_200 = f'{url}/status/200' - self.status_404 = f'{url}/status/404' - self.status_501 = f'{url}/status/501' - self.basic_url = f'{url}/get' - self.html_url = f'{url}/html' - self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response - self.cookies_url = f"{url}/cookies/set/test/value" - - def test_basic_fetch(self): +class TestPlayWrightFetcher: + + @pytest.fixture(scope="class") + def fetcher(self): + """Fixture to create a StealthyFetcher instance for the entire test class""" + return PlayWrightFetcher(auto_match=False) + + @pytest.fixture(autouse=True) + def setup_urls(self, httpbin): + """Fixture to set up URLs for testing""" + self.status_200 = f'{httpbin.url}/status/200' + self.status_404 = f'{httpbin.url}/status/404' + self.status_501 = f'{httpbin.url}/status/501' + self.basic_url = f'{httpbin.url}/get' + self.html_url = f'{httpbin.url}/html' + self.delayed_url = f'{httpbin.url}/delay/10' # 10 Seconds delay response + self.cookies_url = f"{httpbin.url}/cookies/set/test/value" + + def test_basic_fetch(self, fetcher): """Test doing basic fetch request with multiple statuses""" - self.assertEqual(self.fetcher.fetch(self.status_200).status, 200) + assert fetcher.fetch(self.status_200).status == 200 # There's a bug with playwright makes it crashes if a URL returns status code 4xx/5xx without body, let's disable this till they reply to my issue report - # self.assertEqual(self.fetcher.fetch(self.status_404).status, 404) - # self.assertEqual(self.fetcher.fetch(self.status_501).status, 501) + # assert fetcher.fetch(self.status_404).status == 404 + # assert fetcher.fetch(self.status_501).status == 501 - def test_networkidle(self): + def test_networkidle(self, fetcher): """Test if waiting for `networkidle` make page does not finish loading or not""" - self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200) + assert fetcher.fetch(self.basic_url, network_idle=True).status == 200 - def test_blocking_resources(self): + def test_blocking_resources(self, fetcher): """Test if blocking resources make page does not finish loading or not""" - self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200) + assert fetcher.fetch(self.basic_url, disable_resources=True).status == 200 - def test_waiting_selector(self): + def test_waiting_selector(self, fetcher): """Test if waiting for a selector make page does not finish loading or not""" - self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200) + assert fetcher.fetch(self.html_url, wait_selector='h1').status == 200 + assert fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status == 200 - def test_cookies_loading(self): + def test_cookies_loading(self, fetcher): """Test if cookies are set after the request""" - self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'}) + assert fetcher.fetch(self.cookies_url).cookies == {'test': 'value'} - def test_automation(self): + def test_automation(self, fetcher): """Test if automation break the code or not""" + def scroll_page(page): page.mouse.wheel(10, 0) page.mouse.move(100, 400) page.mouse.up() return page - self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200) + assert fetcher.fetch(self.html_url, page_action=scroll_page).status == 200 - def test_properties(self): + @pytest.mark.parametrize("kwargs", [ + {"disable_webgl": True, "hide_canvas": False}, + {"disable_webgl": False, "hide_canvas": True}, + {"stealth": True}, + {"useragent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0'}, + {"extra_headers": {'ayo': ''}} + ]) + def test_properties(self, fetcher, kwargs): """Test if different arguments breaks the code or not""" - self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=True, hide_canvas=False).status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=False, hide_canvas=True).status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, stealth=True).status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, useragent='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0').status, 200) - self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}).status, 200) + response = fetcher.fetch(self.html_url, **kwargs) + assert response.status == 200 - def test_cdp_url(self): - """Test if it's going to try to connect to cdp url or not""" - with self.assertRaises(ValueError): - _ = self.fetcher.fetch(self.html_url, cdp_url='blahblah') + def test_cdp_url_invalid(self, fetcher): + """Test if invalid CDP URLs raise appropriate exceptions""" + with pytest.raises(ValueError): + fetcher.fetch(self.html_url, cdp_url='blahblah') - with self.assertRaises(ValueError): - _ = self.fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True) + with pytest.raises(ValueError): + fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True) - with self.assertRaises(Exception): - # There's no type for this error in PlayWright, it's just `Error` - _ = self.fetcher.fetch(self.html_url, cdp_url='ws://blahblah') + with pytest.raises(Exception): + fetcher.fetch(self.html_url, cdp_url='ws://blahblah') - def test_infinite_timeout(self): + def test_infinite_timeout(self, fetcher, ): """Test if infinite timeout breaks the code or not""" - self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200) + response = fetcher.fetch(self.delayed_url, timeout=None) + assert response.status == 200