Sorting all library imports with isort

D4Vinci · Nov 30, 2024 · 1a17b2c · 1a17b2c
1 parent 26bfa88
commit 1a17b2c
Show file tree

Hide file tree

Showing 24 changed files with 108 additions and 113 deletions.
diff --git a/benchmarks.py b/benchmarks.py
@@ -1,17 +1,18 @@
+import functools
 import time
 import timeit
-import functools
-import requests
 from statistics import mean
 
-from scrapling import Adaptor
-from parsel import Selector
-from lxml import etree, html
+import requests
+from autoscraper import AutoScraper
 from bs4 import BeautifulSoup
+from lxml import etree, html
+from mechanicalsoup import StatefulBrowser
+from parsel import Selector
 from pyquery import PyQuery as pq
-from autoscraper import AutoScraper
 from selectolax.parser import HTMLParser
-from mechanicalsoup import StatefulBrowser
+
+from scrapling import Adaptor
 
 large_html = '<html><body>' + '<div class="item">' * 5000 + '</div>' * 5000 + '</body></html>'
 

diff --git a/docs/Examples/selectorless_stackoverflow.py b/docs/Examples/selectorless_stackoverflow.py
@@ -4,6 +4,7 @@
 """
 
 import requests
+
 from scrapling import Adaptor
 
 response = requests.get('https://stackoverflow.com/questions/tagged/web-scraping?sort=MostVotes&filters=NoAcceptedAnswer&edited=true&pagesize=50&page=2')
@@ -22,4 +23,3 @@
     # We will get all the rest of the titles/authors in the page depending on the first title and the first author we got above as a starting point
     for i, (title, author) in enumerate(zip(first_question_title.find_similar(), first_question_author.find_similar()), start=1):
         print(i, title.text, author.text)
-
diff --git a/scrapling/__init__.py b/scrapling/__init__.py
@@ -1,7 +1,8 @@
 # Declare top-level shortcuts
-from scrapling.fetchers import Fetcher, StealthyFetcher, PlayWrightFetcher, CustomFetcher
+from scrapling.core.custom_types import AttributesHandler, TextHandler
+from scrapling.fetchers import (CustomFetcher, Fetcher, PlayWrightFetcher,
+                                StealthyFetcher)
 from scrapling.parser import Adaptor, Adaptors
-from scrapling.core.custom_types import TextHandler, AttributesHandler
 
 __author__ = "Karim Shoair ([email protected])"
 __version__ = "0.2.7"

diff --git a/scrapling/core/_types.py b/scrapling/core/_types.py
@@ -2,9 +2,8 @@
 Type definitions for type checking purposes.
 """
 
-from typing import (
-    Dict, Optional, Union, Callable, Any, List, Tuple, Pattern, Generator, Iterable, Type, TYPE_CHECKING, Literal
-)
+from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable,
+                    List, Literal, Optional, Pattern, Tuple, Type, Union)
 
 try:
     from typing import Protocol

diff --git a/scrapling/core/custom_types.py b/scrapling/core/custom_types.py
@@ -1,13 +1,13 @@
 import re
-from types import MappingProxyType
 from collections.abc import Mapping
+from types import MappingProxyType
 
-from scrapling.core.utils import _is_iterable, flatten
-from scrapling.core._types import Dict, List, Union, Pattern, SupportsIndex
-
-from orjson import loads, dumps
+from orjson import dumps, loads
 from w3lib.html import replace_entities as _replace_entities
 
+from scrapling.core._types import Dict, List, Pattern, SupportsIndex, Union
+from scrapling.core.utils import _is_iterable, flatten
+
 
 class TextHandler(str):
     """Extends standard Python string by adding more functionality"""

diff --git a/scrapling/core/storage_adaptors.py b/scrapling/core/storage_adaptors.py
@@ -1,16 +1,16 @@
-import orjson
-import sqlite3
 import logging
+import sqlite3
 import threading
-from hashlib import sha256
 from abc import ABC, abstractmethod
+from hashlib import sha256
 
-from scrapling.core._types import Dict, Optional, Union
-from scrapling.core.utils import _StorageTools, cache
-
+import orjson
 from lxml import html
 from tldextract import extract as tld
 
+from scrapling.core._types import Dict, Optional, Union
+from scrapling.core.utils import _StorageTools, cache
+
 
 class StorageSystemMixin(ABC):
     # If you want to make your own storage system, you have to inherit from this

diff --git a/scrapling/core/translator.py b/scrapling/core/translator.py
@@ -10,15 +10,14 @@
 
 import re
 
-from w3lib.html import HTML5_WHITESPACE
-from scrapling.core.utils import cache
-from scrapling.core._types import Any, Optional, Protocol, Self
-
-from cssselect.xpath import ExpressionError
-from cssselect.xpath import XPathExpr as OriginalXPathExpr
 from cssselect import HTMLTranslator as OriginalHTMLTranslator
 from cssselect.parser import Element, FunctionalPseudoElement, PseudoElement
+from cssselect.xpath import ExpressionError
+from cssselect.xpath import XPathExpr as OriginalXPathExpr
+from w3lib.html import HTML5_WHITESPACE
 
+from scrapling.core._types import Any, Optional, Protocol, Self
+from scrapling.core.utils import cache
 
 regex = f"[{HTML5_WHITESPACE}]+"
 replace_html5_whitespaces = re.compile(regex).sub

diff --git a/scrapling/core/utils.py b/scrapling/core/utils.py
@@ -1,14 +1,17 @@
-import re
 import logging
+import re
 from itertools import chain
-# Using cache on top of a class is brilliant way to achieve Singleton design pattern without much code
-from functools import lru_cache as cache  # functools.cache is available on Python 3.9+ only so let's keep lru_cache
-
-from scrapling.core._types import Dict, Iterable, Any, Union
 
 import orjson
 from lxml import html
 
+from scrapling.core._types import Any, Dict, Iterable, Union
+
+# Using cache on top of a class is brilliant way to achieve Singleton design pattern without much code
+# functools.cache is available on Python 3.9+ only so let's keep lru_cache
+from functools import lru_cache as cache  # isort:skip
+
+
 html_forbidden = {html.HtmlComment, }
 logging.basicConfig(
     level=logging.ERROR,

diff --git a/scrapling/defaults.py b/scrapling/defaults.py
@@ -1,4 +1,4 @@
-from .fetchers import Fetcher, StealthyFetcher, PlayWrightFetcher
+from .fetchers import Fetcher, PlayWrightFetcher, StealthyFetcher
 
 # If you are going to use Fetchers with the default settings, import them from this file instead for a cleaner looking code
 Fetcher = Fetcher()

diff --git a/scrapling/engines/__init__.py b/scrapling/engines/__init__.py
@@ -1,7 +1,7 @@
 from .camo import CamoufoxEngine
-from .static import StaticEngine
-from .pw import PlaywrightEngine
 from .constants import DEFAULT_DISABLED_RESOURCES, DEFAULT_STEALTH_FLAGS
+from .pw import PlaywrightEngine
+from .static import StaticEngine
 from .toolbelt import check_if_engine_usable
 
 __all__ = ['CamoufoxEngine', 'PlaywrightEngine']
diff --git a/scrapling/engines/camo.py b/scrapling/engines/camo.py
@@ -1,20 +1,16 @@
 import logging
-from scrapling.core._types import Union, Callable, Optional, Dict, List, Literal
-
-from scrapling.engines.toolbelt import (
-    Response,
-    do_nothing,
-    StatusText,
-    get_os_name,
-    intercept_route,
-    check_type_validity,
-    construct_proxy_dict,
-    generate_convincing_referer,
-)
 
 from camoufox import DefaultAddons
 from camoufox.sync_api import Camoufox
 
+from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
+                                   Union)
+from scrapling.engines.toolbelt import (Response, StatusText,
+                                        check_type_validity,
+                                        construct_proxy_dict, do_nothing,
+                                        generate_convincing_referer,
+                                        get_os_name, intercept_route)
+
 
 class CamoufoxEngine:
     def __init__(

diff --git a/scrapling/engines/pw.py b/scrapling/engines/pw.py
@@ -1,20 +1,15 @@
 import json
 import logging
-from scrapling.core._types import Union, Callable, Optional, List, Dict
 
-from scrapling.engines.constants import DEFAULT_STEALTH_FLAGS, NSTBROWSER_DEFAULT_QUERY
-from scrapling.engines.toolbelt import (
-    Response,
-    do_nothing,
-    StatusText,
-    js_bypass_path,
-    intercept_route,
-    generate_headers,
-    construct_cdp_url,
-    check_type_validity,
-    construct_proxy_dict,
-    generate_convincing_referer,
-)
+from scrapling.core._types import Callable, Dict, List, Optional, Union
+from scrapling.engines.constants import (DEFAULT_STEALTH_FLAGS,
+                                         NSTBROWSER_DEFAULT_QUERY)
+from scrapling.engines.toolbelt import (Response, StatusText,
+                                        check_type_validity, construct_cdp_url,
+                                        construct_proxy_dict, do_nothing,
+                                        generate_convincing_referer,
+                                        generate_headers, intercept_route,
+                                        js_bypass_path)
 
 
 class PlaywrightEngine:

diff --git a/scrapling/engines/static.py b/scrapling/engines/static.py
@@ -1,11 +1,12 @@
 import logging
 
-from scrapling.core._types import Union, Optional, Dict
-from .toolbelt import Response, generate_convincing_referer, generate_headers
-
 import httpx
 from httpx._models import Response as httpxResponse
 
+from scrapling.core._types import Dict, Optional, Union
+
+from .toolbelt import Response, generate_convincing_referer, generate_headers
+
 
 class StaticEngine:
     def __init__(self, follow_redirects: bool = True, timeout: Optional[Union[int, float]] = None, adaptor_arguments: Dict = None):

diff --git a/scrapling/engines/toolbelt/__init__.py b/scrapling/engines/toolbelt/__init__.py
@@ -1,20 +1,6 @@
-from .fingerprints import (
-    get_os_name,
-    generate_headers,
-    generate_convincing_referer,
-)
-from .custom import (
-    Response,
-    do_nothing,
-    StatusText,
-    BaseFetcher,
-    get_variable_name,
-    check_type_validity,
-    check_if_engine_usable,
-)
-from .navigation import (
-    js_bypass_path,
-    intercept_route,
-    construct_cdp_url,
-    construct_proxy_dict,
-)
+from .custom import (BaseFetcher, Response, StatusText, check_if_engine_usable,
+                     check_type_validity, do_nothing, get_variable_name)
+from .fingerprints import (generate_convincing_referer, generate_headers,
+                           get_os_name)
+from .navigation import (construct_cdp_url, construct_proxy_dict,
+                         intercept_route, js_bypass_path)
diff --git a/scrapling/engines/toolbelt/custom.py b/scrapling/engines/toolbelt/custom.py
@@ -5,10 +5,11 @@
 import logging
 from email.message import Message
 
+from scrapling.core._types import (Any, Callable, Dict, List, Optional, Tuple,
+                                   Type, Union)
 from scrapling.core.custom_types import MappingProxyType
+from scrapling.core.utils import cache, setup_basic_logging
 from scrapling.parser import Adaptor, SQLiteStorageSystem
-from scrapling.core.utils import setup_basic_logging, cache
-from scrapling.core._types import Any, List, Type, Union, Optional, Dict, Callable, Tuple
 
 
 class ResponseEncoding:

diff --git a/scrapling/engines/toolbelt/fingerprints.py b/scrapling/engines/toolbelt/fingerprints.py
@@ -4,12 +4,12 @@
 
 import platform
 
-from scrapling.core.utils import cache
-from scrapling.core._types import Union, Dict
-
+from browserforge.fingerprints import Fingerprint, FingerprintGenerator
+from browserforge.headers import Browser, HeaderGenerator
 from tldextract import extract
-from browserforge.headers import HeaderGenerator, Browser
-from browserforge.fingerprints import FingerprintGenerator, Fingerprint
+
+from scrapling.core._types import Dict, Union
+from scrapling.core.utils import cache
 
 
 @cache(None, typed=True)

diff --git a/scrapling/engines/toolbelt/navigation.py b/scrapling/engines/toolbelt/navigation.py
@@ -2,16 +2,16 @@
 Functions related to files and URLs
 """
 
-import os
 import logging
-from urllib.parse import urlparse, urlencode
+import os
+from urllib.parse import urlencode, urlparse
+
+from playwright.sync_api import Route
 
+from scrapling.core._types import Dict, Optional, Union
 from scrapling.core.utils import cache
-from scrapling.core._types import Union, Dict, Optional
 from scrapling.engines.constants import DEFAULT_DISABLED_RESOURCES
 
-from playwright.sync_api import Route
-
 
 def intercept_route(route: Route) -> Union[Route, None]:
     """This is just a route handler but it drops requests that its type falls in `DEFAULT_DISABLED_RESOURCES`

diff --git a/scrapling/fetchers.py b/scrapling/fetchers.py
@@ -1,7 +1,8 @@
-from scrapling.core._types import Dict, Optional, Union, Callable, List, Literal
-
-from scrapling.engines.toolbelt import Response, BaseFetcher, do_nothing
-from scrapling.engines import CamoufoxEngine, PlaywrightEngine, StaticEngine, check_if_engine_usable
+from scrapling.core._types import (Callable, Dict, List, Literal, Optional,
+                                   Union)
+from scrapling.engines import (CamoufoxEngine, PlaywrightEngine, StaticEngine,
+                               check_if_engine_usable)
+from scrapling.engines.toolbelt import BaseFetcher, Response, do_nothing
 
 
 class Fetcher(BaseFetcher):

diff --git a/scrapling/parser.py b/scrapling/parser.py
@@ -1,16 +1,23 @@
+import inspect
 import os
 import re
-import inspect
 from difflib import SequenceMatcher
 
-from scrapling.core.translator import HTMLTranslator
-from scrapling.core.mixins import SelectorsGeneration
-from scrapling.core.custom_types import TextHandler, TextHandlers, AttributesHandler
-from scrapling.core.storage_adaptors import SQLiteStorageSystem, StorageSystemMixin, _StorageTools
-from scrapling.core.utils import setup_basic_logging, logging, clean_spaces, flatten, html_forbidden, is_jsonable
-from scrapling.core._types import Any, Dict, List, Tuple, Optional, Pattern, Union, Callable, Generator, SupportsIndex, Iterable
+from cssselect import SelectorError, SelectorSyntaxError
+from cssselect import parse as split_selectors
 from lxml import etree, html
-from cssselect import SelectorError, SelectorSyntaxError, parse as split_selectors
+
+from scrapling.core._types import (Any, Callable, Dict, Generator, Iterable,
+                                   List, Optional, Pattern, SupportsIndex,
+                                   Tuple, Union)
+from scrapling.core.custom_types import (AttributesHandler, TextHandler,
+                                         TextHandlers)
+from scrapling.core.mixins import SelectorsGeneration
+from scrapling.core.storage_adaptors import (SQLiteStorageSystem,
+                                             StorageSystemMixin, _StorageTools)
+from scrapling.core.translator import HTMLTranslator
+from scrapling.core.utils import (clean_spaces, flatten, html_forbidden,
+                                  is_jsonable, logging, setup_basic_logging)
 
 
 class Adaptor(SelectorsGeneration):

diff --git a/setup.py b/setup.py
@@ -1,4 +1,4 @@
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
 
 with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()

diff --git a/tests/fetchers/test_camoufox.py b/tests/fetchers/test_camoufox.py
@@ -1,4 +1,5 @@
 import unittest
+
 import pytest_httpbin
 
 from scrapling import StealthyFetcher

diff --git a/tests/fetchers/test_httpx.py b/tests/fetchers/test_httpx.py
@@ -1,4 +1,5 @@
 import unittest
+
 import pytest_httpbin
 
 from scrapling import Fetcher

diff --git a/tests/fetchers/test_playwright.py b/tests/fetchers/test_playwright.py
@@ -1,4 +1,5 @@
 import unittest
+
 import pytest_httpbin
 
 from scrapling import PlayWrightFetcher