From df1e3083a4dce2ffbc1c28475c14c1301a417d6d Mon Sep 17 00:00:00 2001 From: Ryan Marten Date: Mon, 16 Dec 2024 14:25:21 -0800 Subject: [PATCH] default timeout at 10 minutes --- .../request_processor/base_online_request_processor.py | 2 ++ .../request_processor/litellm_online_request_processor.py | 5 ++--- .../request_processor/openai_online_request_processor.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/bespokelabs/curator/request_processor/base_online_request_processor.py b/src/bespokelabs/curator/request_processor/base_online_request_processor.py index 1aee4682..3d8f9af0 100644 --- a/src/bespokelabs/curator/request_processor/base_online_request_processor.py +++ b/src/bespokelabs/curator/request_processor/base_online_request_processor.py @@ -26,6 +26,7 @@ DEFAULT_MAX_TOKENS_PER_MINUTE = 100_000 DEFAULT_MAX_RETRIES = 10 SECONDS_TO_PAUSE_ON_RATE_LIMIT = 10 +DEFAULT_REQUEST_TIMEOUT = 10 * 60 # 10 minutes @dataclass @@ -144,6 +145,7 @@ def __init__( self.max_retries = DEFAULT_MAX_RETRIES else: self.max_retries = max_retries + self.timeout = DEFAULT_REQUEST_TIMEOUT @property def max_requests_per_minute(self) -> int: diff --git a/src/bespokelabs/curator/request_processor/litellm_online_request_processor.py b/src/bespokelabs/curator/request_processor/litellm_online_request_processor.py index 86bfef8a..df64e665 100644 --- a/src/bespokelabs/curator/request_processor/litellm_online_request_processor.py +++ b/src/bespokelabs/curator/request_processor/litellm_online_request_processor.py @@ -18,7 +18,6 @@ logger = logging.getLogger(__name__) litellm.suppress_debug_info = True -REQUEST_TIMEOUT = 10 * 60.0 # same as openai python sdk class LiteLLMOnlineRequestProcessor(BaseOnlineRequestProcessor): @@ -269,7 +268,7 @@ async def call_single_request( await self.client.chat.completions.create_with_completion( **request.api_specific_request, response_model=request.prompt_formatter.response_format, - timeout=REQUEST_TIMEOUT, + timeout=self.timeout, ) ) response_message = ( @@ -277,7 +276,7 @@ async def call_single_request( ) else: completion_obj = await litellm.acompletion( - **request.api_specific_request, timeout=REQUEST_TIMEOUT + **request.api_specific_request, timeout=self.timeout ) response_message = completion_obj["choices"][0]["message"]["content"] except litellm.RateLimitError as e: diff --git a/src/bespokelabs/curator/request_processor/openai_online_request_processor.py b/src/bespokelabs/curator/request_processor/openai_online_request_processor.py index 33731f6d..a8416906 100644 --- a/src/bespokelabs/curator/request_processor/openai_online_request_processor.py +++ b/src/bespokelabs/curator/request_processor/openai_online_request_processor.py @@ -272,7 +272,7 @@ async def call_single_request( self.url, headers=request_header, json=request.api_specific_request, - timeout=60.0, + timeout=self.timeout, ) as response_obj: response = await response_obj.json()