Skip to content

Commit

Permalink
style: Rewrite sync Fetchers tests to a cleaner version
Browse files Browse the repository at this point in the history
  • Loading branch information
D4Vinci committed Dec 15, 2024
1 parent 2606f7a commit 3ecffcb
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 136 deletions.
77 changes: 40 additions & 37 deletions tests/fetchers/sync/test_camoufox.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,68 @@
import unittest

import pytest
import pytest_httpbin

from scrapling import StealthyFetcher


@pytest_httpbin.use_class_based_httpbin
# @pytest_httpbin.use_class_based_httpbin_secure
class TestStealthyFetcher(unittest.TestCase):
def setUp(self):
self.fetcher = StealthyFetcher(auto_match=False)
url = self.httpbin.url
self.status_200 = f'{url}/status/200'
self.status_404 = f'{url}/status/404'
self.status_501 = f'{url}/status/501'
self.basic_url = f'{url}/get'
self.html_url = f'{url}/html'
self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response
self.cookies_url = f"{url}/cookies/set/test/value"
class TestStealthyFetcher:
@pytest.fixture(scope="class")
def fetcher(self):
"""Fixture to create a StealthyFetcher instance for the entire test class"""
return StealthyFetcher(auto_match=False)

@pytest.fixture(autouse=True)
def setup_urls(self, httpbin):
"""Fixture to set up URLs for testing"""
self.status_200 = f'{httpbin.url}/status/200'
self.status_404 = f'{httpbin.url}/status/404'
self.status_501 = f'{httpbin.url}/status/501'
self.basic_url = f'{httpbin.url}/get'
self.html_url = f'{httpbin.url}/html'
self.delayed_url = f'{httpbin.url}/delay/10' # 10 Seconds delay response
self.cookies_url = f"{httpbin.url}/cookies/set/test/value"

def test_basic_fetch(self):
def test_basic_fetch(self, fetcher):
"""Test doing basic fetch request with multiple statuses"""
self.assertEqual(self.fetcher.fetch(self.status_200).status, 200)
self.assertEqual(self.fetcher.fetch(self.status_404).status, 404)
self.assertEqual(self.fetcher.fetch(self.status_501).status, 501)
assert fetcher.fetch(self.status_200).status == 200
assert fetcher.fetch(self.status_404).status == 404
assert fetcher.fetch(self.status_501).status == 501

def test_networkidle(self):
def test_networkidle(self, fetcher):
"""Test if waiting for `networkidle` make page does not finish loading or not"""
self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200)
assert fetcher.fetch(self.basic_url, network_idle=True).status == 200

def test_blocking_resources(self):
def test_blocking_resources(self, fetcher):
"""Test if blocking resources make page does not finish loading or not"""
self.assertEqual(self.fetcher.fetch(self.basic_url, block_images=True).status, 200)
self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200)
assert fetcher.fetch(self.basic_url, block_images=True).status == 200
assert fetcher.fetch(self.basic_url, disable_resources=True).status == 200

def test_waiting_selector(self):
def test_waiting_selector(self, fetcher):
"""Test if waiting for a selector make page does not finish loading or not"""
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200)
assert fetcher.fetch(self.html_url, wait_selector='h1').status == 200
assert fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status == 200

def test_cookies_loading(self):
def test_cookies_loading(self, fetcher):
"""Test if cookies are set after the request"""
self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'})
assert fetcher.fetch(self.cookies_url).cookies == {'test': 'value'}

def test_automation(self):
def test_automation(self, fetcher):
"""Test if automation break the code or not"""
def scroll_page(page):
page.mouse.wheel(10, 0)
page.mouse.move(100, 400)
page.mouse.up()
return page

self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200)
assert fetcher.fetch(self.html_url, page_action=scroll_page).status == 200

def test_properties(self):
def test_properties(self, fetcher):
"""Test if different arguments breaks the code or not"""
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status, 200)
assert fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=True).status == 200
assert fetcher.fetch(self.html_url, block_webrtc=False, allow_webgl=True).status == 200
assert fetcher.fetch(self.html_url, block_webrtc=True, allow_webgl=False).status == 200
assert fetcher.fetch(self.html_url, extra_headers={'ayo': ''}, os_randomize=True).status == 200

def test_infinite_timeout(self):
def test_infinite_timeout(self, fetcher):
"""Test if infinite timeout breaks the code or not"""
self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200)
assert fetcher.fetch(self.delayed_url, timeout=None).status == 200
118 changes: 66 additions & 52 deletions tests/fetchers/sync/test_httpx.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,82 @@
import unittest

import pytest
import pytest_httpbin

from scrapling import Fetcher


@pytest_httpbin.use_class_based_httpbin
class TestFetcher(unittest.TestCase):
def setUp(self):
self.fetcher = Fetcher(auto_match=False)
url = self.httpbin.url
self.status_200 = f'{url}/status/200'
self.status_404 = f'{url}/status/404'
self.status_501 = f'{url}/status/501'
self.basic_url = f'{url}/get'
self.post_url = f'{url}/post'
self.put_url = f'{url}/put'
self.delete_url = f'{url}/delete'
self.html_url = f'{url}/html'
class TestFetcher:
@pytest.fixture(scope="class")
def fetcher(self):
"""Fixture to create a Fetcher instance for the entire test class"""
return Fetcher(auto_match=False)

@pytest.fixture(autouse=True)
def setup_urls(self, httpbin):
"""Fixture to set up URLs for testing"""
self.status_200 = f'{httpbin.url}/status/200'
self.status_404 = f'{httpbin.url}/status/404'
self.status_501 = f'{httpbin.url}/status/501'
self.basic_url = f'{httpbin.url}/get'
self.post_url = f'{httpbin.url}/post'
self.put_url = f'{httpbin.url}/put'
self.delete_url = f'{httpbin.url}/delete'
self.html_url = f'{httpbin.url}/html'

def test_basic_get(self):
def test_basic_get(self, fetcher):
"""Test doing basic get request with multiple statuses"""
self.assertEqual(self.fetcher.get(self.status_200).status, 200)
self.assertEqual(self.fetcher.get(self.status_404).status, 404)
self.assertEqual(self.fetcher.get(self.status_501).status, 501)
assert fetcher.get(self.status_200).status == 200
assert fetcher.get(self.status_404).status == 404
assert fetcher.get(self.status_501).status == 501

def test_get_properties(self):
def test_get_properties(self, fetcher):
"""Test if different arguments with GET request breaks the code or not"""
self.assertEqual(self.fetcher.get(self.status_200, stealthy_headers=True).status, 200)
self.assertEqual(self.fetcher.get(self.status_200, follow_redirects=True).status, 200)
self.assertEqual(self.fetcher.get(self.status_200, timeout=None).status, 200)
self.assertEqual(
self.fetcher.get(self.status_200, stealthy_headers=True, follow_redirects=True, timeout=None).status,
200
)
assert fetcher.get(self.status_200, stealthy_headers=True).status == 200
assert fetcher.get(self.status_200, follow_redirects=True).status == 200
assert fetcher.get(self.status_200, timeout=None).status == 200
assert fetcher.get(
self.status_200,
stealthy_headers=True,
follow_redirects=True,
timeout=None
).status == 200

def test_post_properties(self):
def test_post_properties(self, fetcher):
"""Test if different arguments with POST request breaks the code or not"""
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}).status, 200)
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status, 200)
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status, 200)
self.assertEqual(self.fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status, 200)
self.assertEqual(
self.fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status,
200
)
assert fetcher.post(self.post_url, data={'key': 'value'}).status == 200
assert fetcher.post(self.post_url, data={'key': 'value'}, stealthy_headers=True).status == 200
assert fetcher.post(self.post_url, data={'key': 'value'}, follow_redirects=True).status == 200
assert fetcher.post(self.post_url, data={'key': 'value'}, timeout=None).status == 200
assert fetcher.post(
self.post_url,
data={'key': 'value'},
stealthy_headers=True,
follow_redirects=True,
timeout=None
).status == 200

def test_put_properties(self):
def test_put_properties(self, fetcher):
"""Test if different arguments with PUT request breaks the code or not"""
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}).status, 200)
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status, 200)
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status, 200)
self.assertEqual(self.fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status, 200)
self.assertEqual(
self.fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True, follow_redirects=True, timeout=None).status,
200
)
assert fetcher.put(self.put_url, data={'key': 'value'}).status == 200
assert fetcher.put(self.put_url, data={'key': 'value'}, stealthy_headers=True).status == 200
assert fetcher.put(self.put_url, data={'key': 'value'}, follow_redirects=True).status == 200
assert fetcher.put(self.put_url, data={'key': 'value'}, timeout=None).status == 200
assert fetcher.put(
self.put_url,
data={'key': 'value'},
stealthy_headers=True,
follow_redirects=True,
timeout=None
).status == 200

def test_delete_properties(self):
def test_delete_properties(self, fetcher):
"""Test if different arguments with DELETE request breaks the code or not"""
self.assertEqual(self.fetcher.delete(self.delete_url, stealthy_headers=True).status, 200)
self.assertEqual(self.fetcher.delete(self.delete_url, follow_redirects=True).status, 200)
self.assertEqual(self.fetcher.delete(self.delete_url, timeout=None).status, 200)
self.assertEqual(
self.fetcher.delete(self.delete_url, stealthy_headers=True, follow_redirects=True, timeout=None).status,
200
)
assert fetcher.delete(self.delete_url, stealthy_headers=True).status == 200
assert fetcher.delete(self.delete_url, follow_redirects=True).status == 200
assert fetcher.delete(self.delete_url, timeout=None).status == 200
assert fetcher.delete(
self.delete_url,
stealthy_headers=True,
follow_redirects=True,
timeout=None
).status == 200
103 changes: 56 additions & 47 deletions tests/fetchers/sync/test_playwright.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,87 @@
import unittest

import pytest
import pytest_httpbin

from scrapling import PlayWrightFetcher


@pytest_httpbin.use_class_based_httpbin
# @pytest_httpbin.use_class_based_httpbin_secure
class TestPlayWrightFetcher(unittest.TestCase):
def setUp(self):
self.fetcher = PlayWrightFetcher(auto_match=False)
url = self.httpbin.url
self.status_200 = f'{url}/status/200'
self.status_404 = f'{url}/status/404'
self.status_501 = f'{url}/status/501'
self.basic_url = f'{url}/get'
self.html_url = f'{url}/html'
self.delayed_url = f'{url}/delay/10' # 10 Seconds delay response
self.cookies_url = f"{url}/cookies/set/test/value"

def test_basic_fetch(self):
class TestPlayWrightFetcher:

@pytest.fixture(scope="class")
def fetcher(self):
"""Fixture to create a StealthyFetcher instance for the entire test class"""
return PlayWrightFetcher(auto_match=False)

@pytest.fixture(autouse=True)
def setup_urls(self, httpbin):
"""Fixture to set up URLs for testing"""
self.status_200 = f'{httpbin.url}/status/200'
self.status_404 = f'{httpbin.url}/status/404'
self.status_501 = f'{httpbin.url}/status/501'
self.basic_url = f'{httpbin.url}/get'
self.html_url = f'{httpbin.url}/html'
self.delayed_url = f'{httpbin.url}/delay/10' # 10 Seconds delay response
self.cookies_url = f"{httpbin.url}/cookies/set/test/value"

def test_basic_fetch(self, fetcher):
"""Test doing basic fetch request with multiple statuses"""
self.assertEqual(self.fetcher.fetch(self.status_200).status, 200)
assert fetcher.fetch(self.status_200).status == 200
# There's a bug with playwright makes it crashes if a URL returns status code 4xx/5xx without body, let's disable this till they reply to my issue report
# self.assertEqual(self.fetcher.fetch(self.status_404).status, 404)
# self.assertEqual(self.fetcher.fetch(self.status_501).status, 501)
# assert fetcher.fetch(self.status_404).status == 404
# assert fetcher.fetch(self.status_501).status == 501

def test_networkidle(self):
def test_networkidle(self, fetcher):
"""Test if waiting for `networkidle` make page does not finish loading or not"""
self.assertEqual(self.fetcher.fetch(self.basic_url, network_idle=True).status, 200)
assert fetcher.fetch(self.basic_url, network_idle=True).status == 200

def test_blocking_resources(self):
def test_blocking_resources(self, fetcher):
"""Test if blocking resources make page does not finish loading or not"""
self.assertEqual(self.fetcher.fetch(self.basic_url, disable_resources=True).status, 200)
assert fetcher.fetch(self.basic_url, disable_resources=True).status == 200

def test_waiting_selector(self):
def test_waiting_selector(self, fetcher):
"""Test if waiting for a selector make page does not finish loading or not"""
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1').status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status, 200)
assert fetcher.fetch(self.html_url, wait_selector='h1').status == 200
assert fetcher.fetch(self.html_url, wait_selector='h1', wait_selector_state='visible').status == 200

def test_cookies_loading(self):
def test_cookies_loading(self, fetcher):
"""Test if cookies are set after the request"""
self.assertEqual(self.fetcher.fetch(self.cookies_url).cookies, {'test': 'value'})
assert fetcher.fetch(self.cookies_url).cookies == {'test': 'value'}

def test_automation(self):
def test_automation(self, fetcher):
"""Test if automation break the code or not"""

def scroll_page(page):
page.mouse.wheel(10, 0)
page.mouse.move(100, 400)
page.mouse.up()
return page

self.assertEqual(self.fetcher.fetch(self.html_url, page_action=scroll_page).status, 200)
assert fetcher.fetch(self.html_url, page_action=scroll_page).status == 200

def test_properties(self):
@pytest.mark.parametrize("kwargs", [
{"disable_webgl": True, "hide_canvas": False},
{"disable_webgl": False, "hide_canvas": True},
{"stealth": True},
{"useragent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0'},
{"extra_headers": {'ayo': ''}}
])
def test_properties(self, fetcher, kwargs):
"""Test if different arguments breaks the code or not"""
self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=True, hide_canvas=False).status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, disable_webgl=False, hide_canvas=True).status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, stealth=True).status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, useragent='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:131.0) Gecko/20100101 Firefox/131.0').status, 200)
self.assertEqual(self.fetcher.fetch(self.html_url, extra_headers={'ayo': ''}).status, 200)
response = fetcher.fetch(self.html_url, **kwargs)
assert response.status == 200

def test_cdp_url(self):
"""Test if it's going to try to connect to cdp url or not"""
with self.assertRaises(ValueError):
_ = self.fetcher.fetch(self.html_url, cdp_url='blahblah')
def test_cdp_url_invalid(self, fetcher):
"""Test if invalid CDP URLs raise appropriate exceptions"""
with pytest.raises(ValueError):
fetcher.fetch(self.html_url, cdp_url='blahblah')

with self.assertRaises(ValueError):
_ = self.fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True)
with pytest.raises(ValueError):
fetcher.fetch(self.html_url, cdp_url='blahblah', nstbrowser_mode=True)

with self.assertRaises(Exception):
# There's no type for this error in PlayWright, it's just `Error`
_ = self.fetcher.fetch(self.html_url, cdp_url='ws://blahblah')
with pytest.raises(Exception):
fetcher.fetch(self.html_url, cdp_url='ws://blahblah')

def test_infinite_timeout(self):
def test_infinite_timeout(self, fetcher, ):
"""Test if infinite timeout breaks the code or not"""
self.assertEqual(self.fetcher.fetch(self.delayed_url, timeout=None).status, 200)
response = fetcher.fetch(self.delayed_url, timeout=None)
assert response.status == 200

0 comments on commit 3ecffcb

Please sign in to comment.