-
-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
style: Rewrite sync Fetchers tests to a cleaner version
- Loading branch information
Showing
3 changed files
with
162 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,65 +1,68 @@ | ||
import unittest | ||
|
||
import pytest | ||
import pytest_httpbin | ||
|
||
from scrapling import StealthyFetcher | ||
|
||
|
||
@pytest_httpbin.use_class_based_httpbin | ||
# @pytest_httpbin.use_class_based_httpbin_secure | ||
class TestStealthyFetcher(unittest.TestCase): | ||
def setUp(self): | ||
self.fetcher = StealthyFetcher(auto_match=False) | ||
url = self.httpbin.url | ||
self.status_200 = f'{url}/status/200' | ||
self.status_404 = f'{url}/status/404' | ||
self.status_501 = f'{url}/status/501' | ||
self.basic_url = f'{url}/get' | ||
self.html_url = f'{url}/html' | ||
self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response | ||
self.cookies_url = f"{url}/cookies/set/test/value" | ||
class TestStealthyFetcher: | ||
@pytest.fixture(scope="class") | ||
def fetcher(self): | ||
"""Fixture to create a StealthyFetcher instance for the entire test class""" | ||
return StealthyFetcher(auto_match=False) | ||
|
||
@pytest.fixture(autouse=True) | ||
def setup_urls(self, httpbin): | ||
"""Fixture to set up URLs for testing""" | ||
self.status_200 = f'{httpbin.url}/status/200' | ||
self.status_404 = f'{httpbin.url}/status/404' | ||
self.status_501 = f'{httpbin.url}/status/501' | ||
self.basic_url = f'{httpbin.url}/get' | ||
self.html_url = f'{httpbin.url}/html' | ||
self.delayed_url = f'{httpbin.url}/delay/10' # 10 Seconds delay response | ||
self.cookies_url = f"{httpbin.url}/cookies/set/test/value" | ||
|
||
def test_basic_fetch(self): | ||
def test_basic_fetch(self, fetcher): | ||
"""Test doing basic fetch request with multiple statuses""" | ||
self.assertEqual(self.fetcher.fetch(self.status_200).status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.status_404).status, 404) | ||
self.assertEqual(self.fetcher.fetch(self.status_501).status, 501) | ||
assert fetcher.fetch(self.status_200).status == 200 | ||
assert fetcher.fetch(self.status_404).status == 404 | ||
assert fetcher.fetch(self.status_501).status == 501 | ||
|
||
def test_networkidle(self): | ||
def test_networkidle(self, fetcher): | ||
"""Test if waiting for `networkidle` make page does not finish loading or not""" | ||
self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200) | ||
assert fetcher.fetch(self.basic_url, network_idle=True).status == 200 | ||
|
||
def test_blocking_resources(self): | ||
def test_blocking_resources(self, fetcher): | ||
"""Test if blocking resources make page does not finish loading or not""" | ||
self.assertEqual(self.fetcher.fetch(self.basic_url, block_images=True).status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200) | ||
assert fetcher.fetch(self.basic_url, block_images=True).status == 200 | ||
assert fetcher.fetch(self.basic_url, disable_resources=True).status == 200 | ||
|
||
def test_waiting_selector(self): | ||
def test_waiting_selector(self, fetcher): | ||
"""Test if waiting for a selector make page does not finish loading or not""" | ||
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200) | ||
assert fetcher.fetch(self.html_url, wait_selector='h1').status == 200 | ||
assert fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status == 200 | ||
|
||
def test_cookies_loading(self): | ||
def test_cookies_loading(self, fetcher): | ||
"""Test if cookies are set after the request""" | ||
self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'}) | ||
assert fetcher.fetch(self.cookies_url).cookies == {'test': 'value'} | ||
|
||
def test_automation(self): | ||
def test_automation(self, fetcher): | ||
"""Test if automation break the code or not""" | ||
def scroll_page(page): | ||
page.mouse.wheel(10, 0) | ||
page.mouse.move(100, 400) | ||
page.mouse.up() | ||
return page | ||
|
||
self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200) | ||
assert fetcher.fetch(self.html_url, page_action=scroll_page).status == 200 | ||
|
||
def test_properties(self): | ||
def test_properties(self, fetcher): | ||
"""Test if different arguments breaks the code or not""" | ||
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status, 200) | ||
assert fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status == 200 | ||
assert fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status == 200 | ||
assert fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status == 200 | ||
assert fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status == 200 | ||
|
||
def test_infinite_timeout(self): | ||
def test_infinite_timeout(self, fetcher): | ||
"""Test if infinite timeout breaks the code or not""" | ||
self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200) | ||
assert fetcher.fetch(self.delayed_url, timeout=None).status == 200 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +1,82 @@ | ||
import unittest | ||
|
||
import pytest | ||
import pytest_httpbin | ||
|
||
from scrapling import Fetcher | ||
|
||
|
||
@pytest_httpbin.use_class_based_httpbin | ||
class TestFetcher(unittest.TestCase): | ||
def setUp(self): | ||
self.fetcher = Fetcher(auto_match=False) | ||
url = self.httpbin.url | ||
self.status_200 = f'{url}/status/200' | ||
self.status_404 = f'{url}/status/404' | ||
self.status_501 = f'{url}/status/501' | ||
self.basic_url = f'{url}/get' | ||
self.post_url = f'{url}/post' | ||
self.put_url = f'{url}/put' | ||
self.delete_url = f'{url}/delete' | ||
self.html_url = f'{url}/html' | ||
class TestFetcher: | ||
@pytest.fixture(scope="class") | ||
def fetcher(self): | ||
"""Fixture to create a Fetcher instance for the entire test class""" | ||
return Fetcher(auto_match=False) | ||
|
||
@pytest.fixture(autouse=True) | ||
def setup_urls(self, httpbin): | ||
"""Fixture to set up URLs for testing""" | ||
self.status_200 = f'{httpbin.url}/status/200' | ||
self.status_404 = f'{httpbin.url}/status/404' | ||
self.status_501 = f'{httpbin.url}/status/501' | ||
self.basic_url = f'{httpbin.url}/get' | ||
self.post_url = f'{httpbin.url}/post' | ||
self.put_url = f'{httpbin.url}/put' | ||
self.delete_url = f'{httpbin.url}/delete' | ||
self.html_url = f'{httpbin.url}/html' | ||
|
||
def test_basic_get(self): | ||
def test_basic_get(self, fetcher): | ||
"""Test doing basic get request with multiple statuses""" | ||
self.assertEqual(self.fetcher.get(self.status_200).status, 200) | ||
self.assertEqual(self.fetcher.get(self.status_404).status, 404) | ||
self.assertEqual(self.fetcher.get(self.status_501).status, 501) | ||
assert fetcher.get(self.status_200).status == 200 | ||
assert fetcher.get(self.status_404).status == 404 | ||
assert fetcher.get(self.status_501).status == 501 | ||
|
||
def test_get_properties(self): | ||
def test_get_properties(self, fetcher): | ||
"""Test if different arguments with GET request breaks the code or not""" | ||
self.assertEqual(self.fetcher.get(self.status_200, stealthy_headers=True).status, 200) | ||
self.assertEqual(self.fetcher.get(self.status_200, follow_redirects=True).status, 200) | ||
self.assertEqual(self.fetcher.get(self.status_200, timeout=None).status, 200) | ||
self.assertEqual( | ||
self.fetcher.get(self.status_200, stealthy_headers=True, follow_redirects=True, timeout=None).status, | ||
200 | ||
) | ||
assert fetcher.get(self.status_200, stealthy_headers=True).status == 200 | ||
assert fetcher.get(self.status_200, follow_redirects=True).status == 200 | ||
assert fetcher.get(self.status_200, timeout=None).status == 200 | ||
assert fetcher.get( | ||
self.status_200, | ||
stealthy_headers=True, | ||
follow_redirects=True, | ||
timeout=None | ||
).status == 200 | ||
|
||
def test_post_properties(self): | ||
def test_post_properties(self, fetcher): | ||
"""Test if different arguments with POST request breaks the code or not""" | ||
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}).status, 200) | ||
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status, 200) | ||
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status, 200) | ||
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status, 200) | ||
self.assertEqual( | ||
self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status, | ||
200 | ||
) | ||
assert fetcher.post(self.post_url, data={'key': 'value'}).status == 200 | ||
assert fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status == 200 | ||
assert fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status == 200 | ||
assert fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status == 200 | ||
assert fetcher.post( | ||
self.post_url, | ||
data={'key': 'value'}, | ||
stealthy_headers=True, | ||
follow_redirects=True, | ||
timeout=None | ||
).status == 200 | ||
|
||
def test_put_properties(self): | ||
def test_put_properties(self, fetcher): | ||
"""Test if different arguments with PUT request breaks the code or not""" | ||
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}).status, 200) | ||
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status, 200) | ||
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status, 200) | ||
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status, 200) | ||
self.assertEqual( | ||
self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status, | ||
200 | ||
) | ||
assert fetcher.put(self.put_url, data={'key': 'value'}).status == 200 | ||
assert fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status == 200 | ||
assert fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status == 200 | ||
assert fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status == 200 | ||
assert fetcher.put( | ||
self.put_url, | ||
data={'key': 'value'}, | ||
stealthy_headers=True, | ||
follow_redirects=True, | ||
timeout=None | ||
).status == 200 | ||
|
||
def test_delete_properties(self): | ||
def test_delete_properties(self, fetcher): | ||
"""Test if different arguments with DELETE request breaks the code or not""" | ||
self.assertEqual(self.fetcher.delete(self.delete_url, stealthy_headers=True).status, 200) | ||
self.assertEqual(self.fetcher.delete(self.delete_url, follow_redirects=True).status, 200) | ||
self.assertEqual(self.fetcher.delete(self.delete_url, timeout=None).status, 200) | ||
self.assertEqual( | ||
self.fetcher.delete(self.delete_url, stealthy_headers=True, follow_redirects=True, timeout=None).status, | ||
200 | ||
) | ||
assert fetcher.delete(self.delete_url, stealthy_headers=True).status == 200 | ||
assert fetcher.delete(self.delete_url, follow_redirects=True).status == 200 | ||
assert fetcher.delete(self.delete_url, timeout=None).status == 200 | ||
assert fetcher.delete( | ||
self.delete_url, | ||
stealthy_headers=True, | ||
follow_redirects=True, | ||
timeout=None | ||
).status == 200 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,78 +1,87 @@ | ||
import unittest | ||
|
||
import pytest | ||
import pytest_httpbin | ||
|
||
from scrapling import PlayWrightFetcher | ||
|
||
|
||
@pytest_httpbin.use_class_based_httpbin | ||
# @pytest_httpbin.use_class_based_httpbin_secure | ||
class TestPlayWrightFetcher(unittest.TestCase): | ||
def setUp(self): | ||
self.fetcher = PlayWrightFetcher(auto_match=False) | ||
url = self.httpbin.url | ||
self.status_200 = f'{url}/status/200' | ||
self.status_404 = f'{url}/status/404' | ||
self.status_501 = f'{url}/status/501' | ||
self.basic_url = f'{url}/get' | ||
self.html_url = f'{url}/html' | ||
self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response | ||
self.cookies_url = f"{url}/cookies/set/test/value" | ||
|
||
def test_basic_fetch(self): | ||
class TestPlayWrightFetcher: | ||
|
||
@pytest.fixture(scope="class") | ||
def fetcher(self): | ||
"""Fixture to create a StealthyFetcher instance for the entire test class""" | ||
return PlayWrightFetcher(auto_match=False) | ||
|
||
@pytest.fixture(autouse=True) | ||
def setup_urls(self, httpbin): | ||
"""Fixture to set up URLs for testing""" | ||
self.status_200 = f'{httpbin.url}/status/200' | ||
self.status_404 = f'{httpbin.url}/status/404' | ||
self.status_501 = f'{httpbin.url}/status/501' | ||
self.basic_url = f'{httpbin.url}/get' | ||
self.html_url = f'{httpbin.url}/html' | ||
self.delayed_url = f'{httpbin.url}/delay/10' # 10 Seconds delay response | ||
self.cookies_url = f"{httpbin.url}/cookies/set/test/value" | ||
|
||
def test_basic_fetch(self, fetcher): | ||
"""Test doing basic fetch request with multiple statuses""" | ||
self.assertEqual(self.fetcher.fetch(self.status_200).status, 200) | ||
assert fetcher.fetch(self.status_200).status == 200 | ||
# There's a bug with playwright makes it crashes if a URL returns status code 4xx/5xx without body, let's disable this till they reply to my issue report | ||
# self.assertEqual(self.fetcher.fetch(self.status_404).status, 404) | ||
# self.assertEqual(self.fetcher.fetch(self.status_501).status, 501) | ||
# assert fetcher.fetch(self.status_404).status == 404 | ||
# assert fetcher.fetch(self.status_501).status == 501 | ||
|
||
def test_networkidle(self): | ||
def test_networkidle(self, fetcher): | ||
"""Test if waiting for `networkidle` make page does not finish loading or not""" | ||
self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200) | ||
assert fetcher.fetch(self.basic_url, network_idle=True).status == 200 | ||
|
||
def test_blocking_resources(self): | ||
def test_blocking_resources(self, fetcher): | ||
"""Test if blocking resources make page does not finish loading or not""" | ||
self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200) | ||
assert fetcher.fetch(self.basic_url, disable_resources=True).status == 200 | ||
|
||
def test_waiting_selector(self): | ||
def test_waiting_selector(self, fetcher): | ||
"""Test if waiting for a selector make page does not finish loading or not""" | ||
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200) | ||
assert fetcher.fetch(self.html_url, wait_selector='h1').status == 200 | ||
assert fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status == 200 | ||
|
||
def test_cookies_loading(self): | ||
def test_cookies_loading(self, fetcher): | ||
"""Test if cookies are set after the request""" | ||
self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'}) | ||
assert fetcher.fetch(self.cookies_url).cookies == {'test': 'value'} | ||
|
||
def test_automation(self): | ||
def test_automation(self, fetcher): | ||
"""Test if automation break the code or not""" | ||
|
||
def scroll_page(page): | ||
page.mouse.wheel(10, 0) | ||
page.mouse.move(100, 400) | ||
page.mouse.up() | ||
return page | ||
|
||
self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200) | ||
assert fetcher.fetch(self.html_url, page_action=scroll_page).status == 200 | ||
|
||
def test_properties(self): | ||
@pytest.mark.parametrize("kwargs", [ | ||
{"disable_webgl": True, "hide_canvas": False}, | ||
{"disable_webgl": False, "hide_canvas": True}, | ||
{"stealth": True}, | ||
{"useragent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0'}, | ||
{"extra_headers": {'ayo': ''}} | ||
]) | ||
def test_properties(self, fetcher, kwargs): | ||
"""Test if different arguments breaks the code or not""" | ||
self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=True, hide_canvas=False).status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=False, hide_canvas=True).status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, stealth=True).status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, useragent='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0').status, 200) | ||
self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}).status, 200) | ||
response = fetcher.fetch(self.html_url, **kwargs) | ||
assert response.status == 200 | ||
|
||
def test_cdp_url(self): | ||
"""Test if it's going to try to connect to cdp url or not""" | ||
with self.assertRaises(ValueError): | ||
_ = self.fetcher.fetch(self.html_url, cdp_url='blahblah') | ||
def test_cdp_url_invalid(self, fetcher): | ||
"""Test if invalid CDP URLs raise appropriate exceptions""" | ||
with pytest.raises(ValueError): | ||
fetcher.fetch(self.html_url, cdp_url='blahblah') | ||
|
||
with self.assertRaises(ValueError): | ||
_ = self.fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True) | ||
with pytest.raises(ValueError): | ||
fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True) | ||
|
||
with self.assertRaises(Exception): | ||
# There's no type for this error in PlayWright, it's just `Error` | ||
_ = self.fetcher.fetch(self.html_url, cdp_url='ws://blahblah') | ||
with pytest.raises(Exception): | ||
fetcher.fetch(self.html_url, cdp_url='ws://blahblah') | ||
|
||
def test_infinite_timeout(self): | ||
def test_infinite_timeout(self, fetcher, ): | ||
"""Test if infinite timeout breaks the code or not""" | ||
self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200) | ||
response = fetcher.fetch(self.delayed_url, timeout=None) | ||
assert response.status == 200 |