diff --git a/paperqa/clients/crossref.py b/paperqa/clients/crossref.py index fc51d06c..9a9d76ae 100644 --- a/paperqa/clients/crossref.py +++ b/paperqa/clients/crossref.py @@ -30,7 +30,7 @@ ) from .client_models import DOIOrTitleBasedProvider, DOIQuery, TitleAuthorQuery -from .exceptions import DOINotFoundError +from .exceptions import DOINotFoundError, make_flaky_ssl_error_predicate logger = logging.getLogger(__name__) @@ -132,16 +132,8 @@ def get_crossref_mailto() -> str: return "example@papercrow.ai" -def is_flaky_crossref_ssl_error(exc: BaseException) -> bool: - """Get if we should retry upon known flaky Crossref failures.""" - # > aiohttp.client_exceptions.ClientConnectorError: - # > Cannot connect to host api.crossref.org:443 ssl:default [nodename nor servname provided, or not known] - # SEE: https://github.com/aio-libs/aiohttp/blob/v3.10.5/aiohttp/client_exceptions.py#L193-L196 - return isinstance(exc, aiohttp.ClientConnectorError) and exc.host == CROSSREF_HOST - - @retry( - retry=retry_if_exception(is_flaky_crossref_ssl_error), + retry=retry_if_exception(make_flaky_ssl_error_predicate(CROSSREF_HOST)), before_sleep=before_sleep_log(logger, logging.WARNING), stop=stop_after_attempt(3), ) @@ -273,7 +265,7 @@ async def parse_crossref_to_doc_details( @retry( - retry=retry_if_exception(is_flaky_crossref_ssl_error), + retry=retry_if_exception(make_flaky_ssl_error_predicate(CROSSREF_HOST)), before_sleep=before_sleep_log(logger, logging.WARNING), stop=stop_after_attempt(3), ) diff --git a/paperqa/clients/exceptions.py b/paperqa/clients/exceptions.py index f4f3dd8a..780c8ddf 100644 --- a/paperqa/clients/exceptions.py +++ b/paperqa/clients/exceptions.py @@ -1,4 +1,19 @@ +from collections.abc import Callable + +import aiohttp + + class DOINotFoundError(Exception): def __init__(self, message="DOI not found") -> None: self.message = message super().__init__(self.message) + + +def make_flaky_ssl_error_predicate(host: str) -> Callable[[BaseException], bool]: + def predicate(exc: BaseException) -> bool: + # > aiohttp.client_exceptions.ClientConnectorError: + # > Cannot connect to host api.host.org:443 ssl:default [nodename nor servname provided, or not known] + # SEE: https://github.com/aio-libs/aiohttp/blob/v3.10.5/aiohttp/client_exceptions.py#L193-L196 + return isinstance(exc, aiohttp.ClientConnectorError) and exc.host == host + + return predicate diff --git a/paperqa/clients/semantic_scholar.py b/paperqa/clients/semantic_scholar.py index 5b770cd7..d6a74171 100644 --- a/paperqa/clients/semantic_scholar.py +++ b/paperqa/clients/semantic_scholar.py @@ -10,6 +10,7 @@ from typing import Any import aiohttp +from tenacity import before_sleep_log, retry, retry_if_exception, stop_after_attempt from paperqa.types import DocDetails from paperqa.utils import ( @@ -21,7 +22,7 @@ from .client_models import DOIOrTitleBasedProvider, DOIQuery, TitleAuthorQuery from .crossref import doi_to_bibtex -from .exceptions import DOINotFoundError +from .exceptions import DOINotFoundError, make_flaky_ssl_error_predicate logger = logging.getLogger(__name__) @@ -47,7 +48,8 @@ SEMANTIC_SCHOLAR_API_FIELDS: str = ",".join( union_collections_to_ordered_list(SEMANTIC_SCHOLAR_API_MAPPING.values()) ) -SEMANTIC_SCHOLAR_BASE_URL = "https://api.semanticscholar.org" +SEMANTIC_SCHOLAR_HOST = "api.semanticscholar.org" +SEMANTIC_SCHOLAR_BASE_URL = f"https://{SEMANTIC_SCHOLAR_HOST}" SEMANTIC_SCHOLAR_HEADER_KEY = "x-api-key" @@ -249,6 +251,11 @@ async def s2_title_search( return await parse_s2_to_doc_details(data, session) +@retry( + retry=retry_if_exception(make_flaky_ssl_error_predicate(SEMANTIC_SCHOLAR_HOST)), + before_sleep=before_sleep_log(logger, logging.WARNING), + stop=stop_after_attempt(3), +) async def get_s2_doc_details_from_doi( doi: str | None, session: aiohttp.ClientSession,