-
Notifications
You must be signed in to change notification settings - Fork 739
fix: Update push_data and user_data annotation with JsonSerializable instead of Any
#1889
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
| import logging | ||
| from abc import ABC | ||
| from datetime import timedelta | ||
| from typing import TYPE_CHECKING, Any, Generic | ||
| from typing import TYPE_CHECKING, Generic | ||
|
|
||
| from more_itertools import partition | ||
| from pydantic import ValidationError | ||
|
|
@@ -26,7 +26,7 @@ | |
| from typing_extensions import Unpack | ||
|
|
||
| from crawlee import RequestTransformAction | ||
| from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, ExtractLinksFunction | ||
| from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, ExtractLinksFunction, JsonSerializable | ||
|
|
||
| from ._abstract_http_parser import AbstractHttpParser | ||
|
|
||
|
|
@@ -200,7 +200,7 @@ async def extract_links( | |
| selector: str = 'a', | ||
| attribute: str = 'href', | ||
| label: str | None = None, | ||
| user_data: dict[str, Any] | None = None, | ||
| user_data: dict[str, JsonSerializable] | None = None, | ||
|
vdusek marked this conversation as resolved.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| transform_request_function: Callable[[RequestOptions], RequestOptions | RequestTransformAction] | ||
| | None = None, | ||
| **kwargs: Unpack[EnqueueLinksKwargs], | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -80,7 +80,7 @@ | |
|
|
||
| if TYPE_CHECKING: | ||
| import re | ||
| from collections.abc import Iterator | ||
| from collections.abc import Iterator, Mapping | ||
| from contextlib import AbstractAsyncContextManager | ||
|
|
||
| from crawlee._types import ( | ||
|
|
@@ -941,7 +941,7 @@ async def export_data( | |
|
|
||
| async def _push_data( | ||
| self, | ||
| data: list[dict[str, Any]] | dict[str, Any], | ||
| data: Sequence[Mapping[str, JsonSerializable]] | Mapping[str, JsonSerializable], | ||
|
vdusek marked this conversation as resolved.
|
||
| dataset_id: str | None = None, | ||
| dataset_name: str | None = None, | ||
| dataset_alias: str | None = None, | ||
|
|
@@ -1015,7 +1015,7 @@ async def enqueue_links( | |
| selector: str | None = None, | ||
| attribute: str | None = None, | ||
| label: str | None = None, | ||
| user_data: dict[str, Any] | None = None, | ||
| user_data: dict[str, JsonSerializable] | None = None, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| transform_request_function: Callable[[RequestOptions], RequestOptions | RequestTransformAction] | ||
| | None = None, | ||
| requests: Sequence[str | Request] | None = None, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,6 +53,7 @@ | |
| HttpHeaders, | ||
| HttpMethod, | ||
| HttpPayload, | ||
| JsonSerializable, | ||
| ) | ||
| from crawlee.browsers._types import BrowserType | ||
|
|
||
|
|
@@ -384,7 +385,7 @@ async def extract_links( | |
| selector: str = 'a', | ||
| attribute: str = 'href', | ||
| label: str | None = None, | ||
| user_data: dict | None = None, | ||
| user_data: dict[str, JsonSerializable] | None = None, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| transform_request_function: Callable[[RequestOptions], RequestOptions | RequestTransformAction] | ||
| | None = None, | ||
| **kwargs: Unpack[EnqueueLinksKwargs], | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,8 @@ | |
| computed_field, | ||
| ) | ||
|
|
||
| from crawlee._types import JsonSerializable | ||
|
|
||
| from ._cookies import CookieParam | ||
| from ._session import Session | ||
|
|
||
|
|
@@ -24,7 +26,7 @@ class SessionModel(BaseModel): | |
|
|
||
| id: Annotated[str, Field(alias='id')] | ||
| max_age: Annotated[timedelta, Field(alias='maxAge')] | ||
| user_data: Annotated[dict, Field(alias='userData')] | ||
| user_data: Annotated[dict[str, JsonSerializable], Field(alias='userData')] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| max_error_score: Annotated[float, Field(alias='maxErrorScore')] | ||
| error_score_decrement: Annotated[float, Field(alias='errorScoreDecrement')] | ||
| created_at: Annotated[datetime, Field(alias='createdAt')] | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -11,8 +11,10 @@ | |||||
| from crawlee.sessions._cookies import CookieParam, SessionCookies | ||||||
|
|
||||||
| if TYPE_CHECKING: | ||||||
| from collections.abc import Mapping, MutableMapping | ||||||
| from http.cookiejar import CookieJar | ||||||
|
|
||||||
| from crawlee._types import JsonSerializable | ||||||
| from crawlee.sessions._models import SessionModel | ||||||
|
|
||||||
| logger = getLogger(__name__) | ||||||
|
|
@@ -36,7 +38,7 @@ def __init__( | |||||
| *, | ||||||
| id: str | None = None, | ||||||
| max_age: timedelta = timedelta(minutes=50), | ||||||
| user_data: dict | None = None, | ||||||
| user_data: Mapping[str, JsonSerializable] | None = None, | ||||||
| max_error_score: float = 3.0, | ||||||
| error_score_decrement: float = 0.5, | ||||||
| created_at: datetime | None = None, | ||||||
|
|
@@ -63,7 +65,7 @@ def __init__( | |||||
| """ | ||||||
| self._id = id or crypto_random_object_id(length=10) | ||||||
| self._max_age = max_age | ||||||
| self._user_data = user_data or {} | ||||||
| self._user_data: dict[str, JsonSerializable] = dict(user_data) if user_data is not None else {} | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no?
Suggested change
|
||||||
| self._max_error_score = max_error_score | ||||||
| self._error_score_decrement = error_score_decrement | ||||||
| self._created_at = created_at or datetime.now(timezone.utc) | ||||||
|
|
@@ -117,7 +119,7 @@ def id(self) -> str: | |||||
| return self._id | ||||||
|
|
||||||
| @property | ||||||
| def user_data(self) -> dict: | ||||||
| def user_data(self) -> MutableMapping[str, JsonSerializable]: | ||||||
| """Get the user data.""" | ||||||
| return self._user_data | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,9 +4,11 @@ | |
| from typing import TYPE_CHECKING | ||
|
|
||
| if TYPE_CHECKING: | ||
| from collections.abc import AsyncIterator | ||
| from typing import Any | ||
| from collections.abc import AsyncIterator, Mapping, Sequence | ||
|
|
||
| from typing_extensions import TypeIs | ||
|
|
||
| from crawlee._types import JsonSerializable | ||
| from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata | ||
|
|
||
|
|
||
|
|
@@ -42,7 +44,7 @@ async def purge(self) -> None: | |
| """ | ||
|
|
||
| @abstractmethod | ||
| async def push_data(self, data: list[Any] | dict[str, Any]) -> None: | ||
| async def push_data(self, data: Sequence[Mapping[str, JsonSerializable]] | Mapping[str, JsonSerializable]) -> None: | ||
| """Push data to the dataset. | ||
|
|
||
| The backend method for the `Dataset.push_data` call. | ||
|
|
@@ -82,7 +84,7 @@ async def iterate_items( | |
| unwind: list[str] | None = None, | ||
| skip_empty: bool = False, | ||
| skip_hidden: bool = False, | ||
| ) -> AsyncIterator[dict[str, Any]]: | ||
| ) -> AsyncIterator[Mapping[str, JsonSerializable]]: | ||
| """Iterate over the dataset items with filtering options. | ||
|
|
||
| The backend method for the `Dataset.iterate_items` call. | ||
|
|
@@ -91,3 +93,9 @@ async def iterate_items( | |
| raise NotImplementedError | ||
| if False: | ||
| yield {} | ||
|
|
||
| @staticmethod | ||
| def _is_list_of_items( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: maybe we can name it "is sequence"? instead of "is list" 🙂 |
||
| data: Sequence[Mapping[str, JsonSerializable]] | Mapping[str, JsonSerializable], | ||
| ) -> TypeIs[Sequence[Mapping[str, JsonSerializable]]]: | ||
| return isinstance(data, list) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just wondering whether now we can't just use Pydantic's type for type checking as well, have you tried it? 🙂