diff --git a/sources/asana_dlt/__init__.py b/sources/asana_dlt/__init__.py index ebf9d10a6..23bc2dba3 100644 --- a/sources/asana_dlt/__init__.py +++ b/sources/asana_dlt/__init__.py @@ -7,10 +7,11 @@ """ import typing as t -from typing import Any, Iterable +from typing import Any, Iterable, Sequence import dlt from dlt.common.typing import TDataItem +from dlt.extract.resource import DltResource from .helpers import get_client from .settings import ( @@ -28,7 +29,7 @@ @dlt.source -def asana_source() -> Any: # should be Sequence[DltResource]: +def asana_source(access_token: str = dlt.secrets.value) -> Sequence[DltResource]: """ The main function that runs all the other functions to fetch data from Asana. Returns: diff --git a/sources/bing_webmaster/__init__.py b/sources/bing_webmaster/__init__.py index 8025e6fba..540583807 100644 --- a/sources/bing_webmaster/__init__.py +++ b/sources/bing_webmaster/__init__.py @@ -6,11 +6,11 @@ """ import time -from typing import Iterable, Iterator, List, Sequence +from typing import Dict, Iterator, List, Sequence import dlt from dlt.common import logger -from dlt.common.typing import DictStrAny, DictStrStr +from dlt.common.typing import DictStrAny from dlt.sources import DltResource from .helpers import get_stats_with_retry, parse_response @@ -18,14 +18,15 @@ @dlt.source(name="bing_webmaster") def source( - site_urls: List[str] = None, site_url_pages: Iterable[DictStrStr] = None + site_urls: List[str] = dlt.config.value, + site_url_pages: List[Dict[str, str]] = dlt.config.value, ) -> Sequence[DltResource]: """ A dlt source for the Bing Webmaster api. It groups resources for the APIs which return organic search traffic statistics Args: site_urls: List[str]: A list of site_urls, e.g, ["dlthub.com", "dlthub.de"]. Use this if you need the weekly traffic per site_url and page - site_url_pages: Iterable[DictStrStr]: A list of pairs of site_url and page. Use this if you need the weekly traffic per site_url, page, and query + site_url_pages: Iterable[Dict[str, str]]: A list of pairs of site_url and page. Use this if you need the weekly traffic per site_url, page, and query Returns: Sequence[DltResource]: A sequence of resources that can be selected from including page_stats and page_query_stats. """ @@ -70,7 +71,7 @@ def page_stats( table_name="bing_page_query_stats", ) def page_query_stats( - site_url_pages: Iterable[DictStrStr], + site_url_pages: List[Dict[str, str]], api_key: str = dlt.secrets.value, ) -> Iterator[Iterator[DictStrAny]]: """ @@ -80,7 +81,7 @@ def page_query_stats( https://learn.microsoft.com/en-us/dotnet/api/microsoft.bing.webmaster.api.interfaces.iwebmasterapi.getpagequerystats Args: - site_url_page (Iterable[DictStrStr]): Iterable of site_url and pages to retrieve statistics for. Can be result of a SQL query, a parsed sitemap, etc. + site_url_page (List[Dict[str,str]]): Iterable of site_url and pages to retrieve statistics for. Can be result of a SQL query, a parsed sitemap, etc. Yields: Iterator[Dict[str, Any]]: An iterator over list of organic traffic statistics. """ diff --git a/sources/chess/__init__.py b/sources/chess/__init__.py index 3915abe5c..f0004f053 100644 --- a/sources/chess/__init__.py +++ b/sources/chess/__init__.py @@ -14,7 +14,9 @@ @dlt.source(name="chess") def source( - players: List[str], start_month: str = None, end_month: str = None + players: List[str] = dlt.config.value, + start_month: str = None, + end_month: str = None, ) -> Sequence[DltResource]: """ A dlt source for the chess.com api. It groups several resources (in this case chess.com API endpoints) containing diff --git a/sources/github/__init__.py b/sources/github/__init__.py index 4a1834528..459d3cc4d 100644 --- a/sources/github/__init__.py +++ b/sources/github/__init__.py @@ -12,8 +12,8 @@ @dlt.source def github_reactions( - owner: str, - name: str, + owner: str = dlt.config.value, + name: str = dlt.config.value, access_token: str = dlt.secrets.value, items_per_page: int = 100, max_items: Optional[int] = None, diff --git a/sources/hubspot/__init__.py b/sources/hubspot/__init__.py index 1edbc1591..16cdac90c 100644 --- a/sources/hubspot/__init__.py +++ b/sources/hubspot/__init__.py @@ -477,8 +477,8 @@ def fetch_props( @dlt.resource def hubspot_events_for_objects( - object_type: THubspotObjectType, - object_ids: List[str], + object_type: THubspotObjectType = dlt.config.value, + object_ids: List[str] = dlt.config.value, api_key: str = dlt.secrets.value, start_date: pendulum.DateTime = STARTDATE, ) -> DltResource: diff --git a/sources/kafka/__init__.py b/sources/kafka/__init__.py index 846b35621..5036fd20e 100644 --- a/sources/kafka/__init__.py +++ b/sources/kafka/__init__.py @@ -27,7 +27,7 @@ standalone=True, ) def kafka_consumer( - topics: Union[str, List[str]], + topics: List[str] = dlt.config.value, credentials: Union[KafkaCredentials, Consumer] = dlt.secrets.value, msg_processor: Optional[ Callable[[Message], Dict[str, Any]] @@ -60,9 +60,6 @@ def kafka_consumer( Yields: Iterable[TDataItem]: Kafka messages. """ - if not isinstance(topics, list): - topics = [topics] - if isinstance(credentials, Consumer): consumer = credentials elif isinstance(credentials, KafkaCredentials): diff --git a/sources/kafka_pipeline.py b/sources/kafka_pipeline.py index af9476cad..0dc08b304 100644 --- a/sources/kafka_pipeline.py +++ b/sources/kafka_pipeline.py @@ -47,7 +47,7 @@ def custom_msg_processor(msg: confluent_kafka.Message) -> Dict[str, Any]: "data": msg.value().decode("utf-8"), } - data = kafka_consumer("books", msg_processor=custom_msg_processor) + data = kafka_consumer(["books"], msg_processor=custom_msg_processor) info = pipeline.run(data) print(info) @@ -63,7 +63,7 @@ def load_starting_from_date() -> None: ) from_date = datetime(2023, 12, 15) - data = kafka_consumer("books", start_from=from_date) + data = kafka_consumer(["books"], start_from=from_date) info = pipeline.run(data) print(info) diff --git a/sources/pipedrive/__init__.py b/sources/pipedrive/__init__.py index 4e9bb3001..1d54fb9a5 100644 --- a/sources/pipedrive/__init__.py +++ b/sources/pipedrive/__init__.py @@ -174,7 +174,7 @@ def parsed_mapping( @dlt.resource(primary_key="id", write_disposition="merge") -def leads( +def leads( pipedrive_api_key: str = dlt.secrets.value, update_time: dlt.sources.incremental[str] = dlt.sources.incremental( "update_time", "1970-01-01 00:00:00"