Skip to content

Commit f443d77

Browse files
committed
Fixing the doc-string to match Sphinx
1 parent 50cd40c commit f443d77

File tree

4 files changed

+28
-15
lines changed

4 files changed

+28
-15
lines changed

scrapling/core/custom_types.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,8 @@ def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entiti
129129

130130

131131
class AttributesHandler(Mapping):
132-
"""A read-only mapping to use instead of the standard dictionary for the speed boost but
133-
at the same time I use it to add more functionalities.
134-
If standard dictionary is needed, just convert this class to dictionary with `dict` function
132+
"""A read-only mapping to use instead of the standard dictionary for the speed boost but at the same time I use it to add more functionalities.
133+
If standard dictionary is needed, just convert this class to dictionary with `dict` function
135134
"""
136135
__slots__ = ('_data',)
137136

scrapling/core/translator.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""
22
Most of this file is adapted version of the translator of parsel library with some modifications simply for 1 important reason...
3-
To add pseudo-elements ``::text`` and ``::attr(ATTR_NAME)`` so we match Parsel/Scrapy selectors format
4-
which will be important in future releases but most importantly...
5-
so you don't have to learn a new selectors/api method like what bs4 done with soupsieve :)
6-
> if you want to learn about this, head to https://cssselect.readthedocs.io/en/latest/#cssselect.FunctionalPseudoElement
3+
4+
To add pseudo-elements ``::text`` and ``::attr(ATTR_NAME)`` so we match Parsel/Scrapy selectors format which will be important in future releases but most importantly...
5+
6+
So you don't have to learn a new selectors/api method like what bs4 done with soupsieve :)
7+
8+
if you want to learn about this, head to https://cssselect.readthedocs.io/en/latest/#cssselect.FunctionalPseudoElement
79
"""
810

911
import re

scrapling/engines/static.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __init__(self, follow_redirects: bool = True, timeout: Optional[Union[int, f
2323
@staticmethod
2424
def _headers_job(headers: Optional[Dict], url: str, stealth: bool) -> Dict:
2525
"""Adds useragent to headers if it doesn't exist, generates real headers and append it to current headers, and
26-
finally generates a referer header that looks like if this request came from Google's search of the current URL's domain.
26+
finally generates a referer header that looks like if this request came from Google's search of the current URL's domain.
2727
2828
:param headers: Current headers in the request if the user passed any
2929
:param url: The Target URL.
@@ -65,6 +65,7 @@ def _prepare_response(self, response: httpxResponse) -> Response:
6565

6666
def get(self, url: str, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:
6767
"""Make basic HTTP GET request for you but with some added flavors.
68+
6869
:param url: Target url.
6970
:param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
7071
create a referer header as if this request had came from Google's search of this URL's domain.
@@ -77,6 +78,7 @@ def get(self, url: str, stealthy_headers: Optional[bool] = True, **kwargs: Dict)
7778

7879
def post(self, url: str, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:
7980
"""Make basic HTTP POST request for you but with some added flavors.
81+
8082
:param url: Target url.
8183
:param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
8284
create a referer header as if this request had came from Google's search of this URL's domain.
@@ -89,6 +91,7 @@ def post(self, url: str, stealthy_headers: Optional[bool] = True, **kwargs: Dict
8991

9092
def delete(self, url: str, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:
9193
"""Make basic HTTP DELETE request for you but with some added flavors.
94+
9295
:param url: Target url.
9396
:param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
9497
create a referer header as if this request had came from Google's search of this URL's domain.
@@ -101,6 +104,7 @@ def delete(self, url: str, stealthy_headers: Optional[bool] = True, **kwargs: Di
101104

102105
def put(self, url: str, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:
103106
"""Make basic HTTP PUT request for you but with some added flavors.
107+
104108
:param url: Target url.
105109
:param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
106110
create a referer header as if this request had came from Google's search of this URL's domain.

scrapling/fetchers.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class Fetcher(BaseFetcher):
1111
"""
1212
def get(self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:
1313
"""Make basic HTTP GET request for you but with some added flavors.
14+
1415
:param url: Target url.
1516
:param follow_redirects: As the name says -- if enabled (default), redirects will be followed.
1617
:param timeout: The time to wait for the request to finish in seconds. The default is 10 seconds.
@@ -24,6 +25,7 @@ def get(self, url: str, follow_redirects: bool = True, timeout: Optional[Union[i
2425

2526
def post(self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:
2627
"""Make basic HTTP POST request for you but with some added flavors.
28+
2729
:param url: Target url.
2830
:param follow_redirects: As the name says -- if enabled (default), redirects will be followed.
2931
:param timeout: The time to wait for the request to finish in seconds. The default is 10 seconds.
@@ -37,19 +39,22 @@ def post(self, url: str, follow_redirects: bool = True, timeout: Optional[Union[
3739

3840
def put(self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:
3941
"""Make basic HTTP PUT request for you but with some added flavors.
42+
4043
:param url: Target url
4144
:param follow_redirects: As the name says -- if enabled (default), redirects will be followed.
4245
:param timeout: The time to wait for the request to finish in seconds. The default is 10 seconds.
4346
:param stealthy_headers: If enabled (default), Fetcher will create and add real browser's headers and
44-
create a referer header as if this request came from Google's search of this URL's domain.
47+
create a referer header as if this request came from Google's search of this URL's domain.
4548
:param kwargs: Any additional keyword arguments are passed directly to `httpx.put()` function so check httpx documentation for details.
49+
4650
:return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
4751
"""
4852
response_object = StaticEngine(follow_redirects, timeout, adaptor_arguments=self.adaptor_arguments).put(url, stealthy_headers, **kwargs)
4953
return response_object
5054

5155
def delete(self, url: str, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = 10, stealthy_headers: Optional[bool] = True, **kwargs: Dict) -> Response:
5256
"""Make basic HTTP DELETE request for you but with some added flavors.
57+
5358
:param url: Target url
5459
:param follow_redirects: As the name says -- if enabled (default), redirects will be followed.
5560
:param timeout: The time to wait for the request to finish in seconds. The default is 10 seconds.
@@ -77,6 +82,7 @@ def fetch(
7782
) -> Response:
7883
"""
7984
Opens up a browser and do your request based on your chosen options below.
85+
8086
:param url: Target url.
8187
:param headless: Run the browser in headless/hidden (default), 'virtual' screen mode, or headful/visible mode.
8288
:param block_images: Prevent the loading of images through Firefox preferences.
@@ -127,14 +133,15 @@ class PlayWrightFetcher(BaseFetcher):
127133
Using this Fetcher class, you can do requests with:
128134
- Vanilla Playwright without any modifications other than the ones you chose.
129135
- Stealthy Playwright with the stealth mode I wrote for it. It's still a work in progress but it bypasses many online tests like bot.sannysoft.com
130-
Some of the things stealth mode does include:
131-
1) Patches the CDP runtime fingerprint.
132-
2) Mimics some of the real browsers' properties by injecting several JS files and using custom options.
133-
3) Using custom flags on launch to hide Playwright even more and make it faster.
134-
4) Generates real browser's headers of the same type and same user OS then append it to the request.
136+
Some of the things stealth mode does include:
137+
1) Patches the CDP runtime fingerprint.
138+
2) Mimics some of the real browsers' properties by injecting several JS files and using custom options.
139+
3) Using custom flags on launch to hide Playwright even more and make it faster.
140+
4) Generates real browser's headers of the same type and same user OS then append it to the request.
135141
- Real browsers by passing the CDP URL of your browser to be controlled by the Fetcher and most of the options can be enabled on it.
136142
- NSTBrowser's docker browserless option by passing the CDP URL and enabling `nstbrowser_mode` option.
137-
> Note that these are the main options with PlayWright but it can be mixed together.
143+
144+
> Note that these are the main options with PlayWright but it can be mixed together.
138145
"""
139146
def fetch(
140147
self, url: str, headless: Union[bool, str] = True, disable_resources: bool = None,
@@ -147,6 +154,7 @@ def fetch(
147154
nstbrowser_mode: bool = False, nstbrowser_config: Optional[Dict] = None,
148155
) -> Response:
149156
"""Opens up a browser and do your request based on your chosen options below.
157+
150158
:param url: Target url.
151159
:param headless: Run the browser in headless/hidden (default), or headful/visible mode.
152160
:param disable_resources: Drop requests of unnecessary resources for speed boost. It depends but it made requests ~25% faster in my tests for some websites.

0 commit comments

Comments
 (0)