diff --git a/async_openai/schemas/chat.py b/async_openai/schemas/chat.py index 953c75e..5dfc2d8 100644 --- a/async_openai/schemas/chat.py +++ b/async_openai/schemas/chat.py @@ -12,7 +12,7 @@ from async_openai.types.resources import BaseResource, Usage from async_openai.types.responses import BaseResponse from async_openai.types.routes import BaseRoute -from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded +from async_openai.types.errors import RateLimitError, InvalidMaxTokens, InvalidRequestError, APIError, MaxRetriesExceeded from async_openai.utils import logger, get_max_chat_tokens, get_chat_tokens_count, parse_stream, aparse_stream @@ -719,20 +719,9 @@ def create( return super().create(input_object = input_object, parse_stream = parse_stream, **kwargs) except RateLimitError as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - sleep_interval = 15.0 - with contextlib.suppress(Exception): - if 'Please retry after' in str(e): - sleep_interval = ( - float( - str(e) - .split("Please retry after")[1] - .split("second")[0] - .strip() - ) - * 1.5 - ) - logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0 + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds') time.sleep(sleep_interval) current_attempt += 1 return self.create( @@ -743,10 +732,12 @@ def create( _current_attempt = current_attempt, **kwargs ) + + except APIError as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds') + raise MaxRetriesExceeded(name = self.name, attempts=current_attempt, base_exception = e) from e + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds') time.sleep(10.0) current_attempt += 1 return self.create( @@ -757,11 +748,14 @@ def create( _current_attempt = current_attempt, **kwargs ) - + + except (InvalidMaxTokens, InvalidRequestError) as e: + raise e + except Exception as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds') time.sleep(10.0) current_attempt += 1 return self.create( @@ -893,20 +887,9 @@ async def async_create( return await super().async_create(input_object = input_object, parse_stream = parse_stream, **kwargs) except RateLimitError as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - sleep_interval = 15.0 - with contextlib.suppress(Exception): - if 'Please retry after' in str(e): - sleep_interval = ( - float( - str(e) - .split("Please retry after")[1] - .split("second")[0] - .strip() - ) - * 1.5 - ) - logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0 + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds') await asyncio.sleep(sleep_interval) current_attempt += 1 return await self.async_create( @@ -919,8 +902,8 @@ async def async_create( ) except APIError as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds') await asyncio.sleep(10.0) current_attempt += 1 return await self.async_create( @@ -932,10 +915,13 @@ async def async_create( **kwargs ) + except (InvalidMaxTokens, InvalidRequestError) as e: + raise e + except Exception as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds') await asyncio.sleep(10.0) current_attempt += 1 return await self.async_create( diff --git a/async_openai/schemas/completions.py b/async_openai/schemas/completions.py index 6417ecf..02b26e7 100644 --- a/async_openai/schemas/completions.py +++ b/async_openai/schemas/completions.py @@ -12,7 +12,7 @@ from async_openai.types.resources import BaseResource, Usage from async_openai.types.responses import BaseResponse from async_openai.types.routes import BaseRoute -from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded +from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded, InvalidMaxTokens, InvalidRequestError from async_openai.utils import logger, get_max_tokens, get_token_count, parse_stream, aparse_stream @@ -463,20 +463,9 @@ def create( return super().create(input_object = input_object, parse_stream = parse_stream, **kwargs) except RateLimitError as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - sleep_interval = 15.0 - with contextlib.suppress(Exception): - if 'Please retry after' in str(e): - sleep_interval = ( - float( - str(e) - .split("Please retry after")[1] - .split("second")[0] - .strip() - ) - * 1.5 - ) - logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0 + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds') time.sleep(sleep_interval) current_attempt += 1 return self.create( @@ -489,8 +478,8 @@ def create( ) except APIError as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds') time.sleep(10.0) current_attempt += 1 return self.create( @@ -502,10 +491,13 @@ def create( **kwargs ) + except (InvalidMaxTokens, InvalidRequestError) as e: + raise e + except Exception as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds') time.sleep(10.0) current_attempt += 1 return self.create( @@ -647,20 +639,9 @@ async def async_create( return await super().async_create(input_object = input_object, parse_stream = parse_stream, **kwargs) except RateLimitError as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - sleep_interval = 15.0 - with contextlib.suppress(Exception): - if 'Please retry after' in str(e): - sleep_interval = ( - float( - str(e) - .split("Please retry after")[1] - .split("second")[0] - .strip() - ) - * 1.5 - ) - logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0 + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds') await asyncio.sleep(sleep_interval) current_attempt += 1 return await self.async_create( @@ -671,10 +652,11 @@ async def async_create( _current_attempt = current_attempt, **kwargs ) + except APIError as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds') await asyncio.sleep(10.0) current_attempt += 1 return await self.async_create( @@ -686,10 +668,13 @@ async def async_create( **kwargs ) + except (InvalidMaxTokens, InvalidRequestError) as e: + raise e + except Exception as e: if current_attempt >= auto_retry_limit: - raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e - logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds') + raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e + logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds') await asyncio.sleep(10.0) current_attempt += 1 return await self.async_create( diff --git a/async_openai/types/errors.py b/async_openai/types/errors.py index 5d2e69f..a02d527 100644 --- a/async_openai/types/errors.py +++ b/async_openai/types/errors.py @@ -1,6 +1,7 @@ import json import aiohttpx +import contextlib from typing import Any, Optional, Union, Dict from lazyops.types import BaseModel, lazyproperty @@ -112,6 +113,10 @@ def __init__( should_retry=should_retry, **kwargs ) + self.post_init(**kwargs) + + def post_init(self, **kwargs): + pass def __str__(self): msg = self.exc.error_message or "" @@ -161,7 +166,15 @@ class PermissionError(OpenAIError): class RateLimitError(OpenAIError): - pass + + def post_init(self, **kwargs): + """ + Gets the rate limit reset time + """ + self.retry_after_seconds: Optional[float] = None + with contextlib.suppress(Exception): + self.retry_after_seconds = float(self.exc.error_message.split("Please retry after", 1)[1].split("second", 1)[0].strip()) + class ServiceUnavailableError(OpenAIError): @@ -172,16 +185,37 @@ class InvalidAPIType(OpenAIError): pass +class InvalidMaxTokens(InvalidRequestError): + pass + + def post_init(self, **kwargs): + """ + Gets the maximum context length and requested max tokens + """ + self.maximum_context_length: Optional[int] = None + self.requested_max_tokens: Optional[int] = None + with contextlib.suppress(Exception): + self.maximum_context_length = int(self.exc.error_message.split("maximum context length is", 1)[1].split(" ", 1)[0].strip()) + self.requested_max_tokens = int(self.exc.error_message.split("requested", 1)[1].split(" ", 1)[0].strip()) + + def fatal_exception(exc): - if isinstance(exc, OpenAIError): - # retry on server errors and client errors - # with 429 status code (rate limited), - # with 400, 404, 415 status codes (invalid request), - # don't retry on other client errors - return (400 <= exc.status < 500) and exc.status not in [429, 400, 404, 415] - else: + """ + Checks if the exception is fatal. + """ + if not isinstance(exc, OpenAIError): # retry on all other errors (eg. network) return False + + # retry on server errors and client errors + # with 429 status code (rate limited), + # with 400, 404, 415 status codes (invalid request), + # 400 can include invalid parameters, such as invalid `max_tokens` + # don't retry on other client errors + if isinstance(exc, (InvalidMaxTokens, InvalidRequestError)): + return True + + return (400 <= exc.status < 500) and exc.status not in [429, 400, 404, 415] # [429, 400, 404, 415] def error_handler( @@ -207,6 +241,13 @@ def error_handler( **kwargs ) if response.status_code in [400, 404, 415]: + if 'maximum context length' in response.text: + return InvalidMaxTokens( + response = response, + data = data, + should_retry = False, + **kwargs + ) return InvalidRequestError( response = response, data = data, @@ -249,12 +290,14 @@ def __init__( self, attempts: int, base_exception: OpenAIError, + name: Optional[str] = None, ): + self.name = name self.attempts = attempts self.ex = base_exception def __str__(self): - return f"Max {self.attempts} retries exceeded: {str(self.ex)}" + return f"[{self.name}] Max {self.attempts} retries exceeded: {str(self.ex)}" @property @@ -262,11 +305,11 @@ def user_message(self): """ Returns the error message. """ - return f"Max {self.attempts} retries exceeded: {self.ex.user_message}" + return f"[{self.name}] Max {self.attempts} retries exceeded: {self.ex.user_message}" def __repr__(self): """ Returns the string representation of the error. """ - return f"{repr(self.ex)} (attempts={self.attempts})" + return f"[{self.name}] {repr(self.ex)} (attempts={self.attempts})" \ No newline at end of file diff --git a/async_openai/version.py b/async_openai/version.py index 179cc67..cb454ec 100644 --- a/async_openai/version.py +++ b/async_openai/version.py @@ -1 +1 @@ -VERSION = '0.0.41rc2' \ No newline at end of file +VERSION = '0.0.41rc3' \ No newline at end of file