better error handling on auto-retry

trisongz · trisongz · commit 38c36a45ad7c · 2023-10-24T10:48:02.000-05:00
- disables retry for invalid requests and maximum tokens
diff --git a/async_openai/schemas/chat.py b/async_openai/schemas/chat.py
@@ -12,7 +12,7 @@
 from async_openai.types.resources import BaseResource, Usage
 from async_openai.types.responses import BaseResponse
 from async_openai.types.routes import BaseRoute
-from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded
+from async_openai.types.errors import RateLimitError, InvalidMaxTokens, InvalidRequestError, APIError, MaxRetriesExceeded
 from async_openai.utils import logger, get_max_chat_tokens, get_chat_tokens_count, parse_stream, aparse_stream
 
 
@@ -719,20 +719,9 @@ def create(
             return super().create(input_object = input_object, parse_stream = parse_stream, **kwargs)
         except RateLimitError as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            sleep_interval = 15.0
-            with contextlib.suppress(Exception):
-                if 'Please retry after' in str(e):
-                    sleep_interval = (
-                        float(
-                            str(e)
-                            .split("Please retry after")[1]
-                            .split("second")[0]
-                            .strip()
-                        )
-                        * 1.5
-                    )
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
             time.sleep(sleep_interval)
             current_attempt += 1
             return self.create(
@@ -743,10 +732,12 @@ def create(
                 _current_attempt = current_attempt,
                 **kwargs
             )
+
+        
         except APIError as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts=current_attempt, base_exception = e) from e
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
             time.sleep(10.0)
             current_attempt += 1
             return self.create(
@@ -757,11 +748,14 @@ def create(
                 _current_attempt = current_attempt,
                 **kwargs
             )
-
+        
+        except (InvalidMaxTokens, InvalidRequestError) as e:
+            raise e
+        
         except Exception as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
             time.sleep(10.0)
             current_attempt += 1
             return self.create(
@@ -893,20 +887,9 @@ async def async_create(
             return await super().async_create(input_object = input_object, parse_stream = parse_stream, **kwargs)
         except RateLimitError as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            sleep_interval = 15.0
-            with contextlib.suppress(Exception):
-                if 'Please retry after' in str(e):
-                    sleep_interval = (
-                        float(
-                            str(e)
-                            .split("Please retry after")[1]
-                            .split("second")[0]
-                            .strip()
-                        )
-                        * 1.5
-                    )
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
             await asyncio.sleep(sleep_interval)
             current_attempt += 1
             return await self.async_create(
@@ -919,8 +902,8 @@ async def async_create(
             )
         except APIError as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
             await asyncio.sleep(10.0)
             current_attempt += 1
             return await self.async_create(
@@ -932,10 +915,13 @@ async def async_create(
                 **kwargs
             )
 
+        except (InvalidMaxTokens, InvalidRequestError) as e:
+            raise e
+        
         except Exception as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
             await asyncio.sleep(10.0)
             current_attempt += 1
             return await self.async_create(
diff --git a/async_openai/schemas/completions.py b/async_openai/schemas/completions.py
@@ -12,7 +12,7 @@
 from async_openai.types.resources import BaseResource, Usage
 from async_openai.types.responses import BaseResponse
 from async_openai.types.routes import BaseRoute
-from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded
+from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded, InvalidMaxTokens, InvalidRequestError
 from async_openai.utils import logger, get_max_tokens, get_token_count, parse_stream, aparse_stream
 
 
@@ -463,20 +463,9 @@ def create(
             return super().create(input_object = input_object, parse_stream = parse_stream, **kwargs)
         except RateLimitError as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            sleep_interval = 15.0
-            with contextlib.suppress(Exception):
-                if 'Please retry after' in str(e):
-                    sleep_interval = (
-                        float(
-                            str(e)
-                            .split("Please retry after")[1]
-                            .split("second")[0]
-                            .strip()
-                        )
-                        * 1.5
-                    )
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
             time.sleep(sleep_interval)
             current_attempt += 1
             return self.create(
@@ -489,8 +478,8 @@ def create(
             )
         except APIError as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
             time.sleep(10.0)
             current_attempt += 1
             return self.create(
@@ -502,10 +491,13 @@ def create(
                 **kwargs
             )
 
+        except (InvalidMaxTokens, InvalidRequestError) as e:
+            raise e
+        
         except Exception as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
             time.sleep(10.0)
             current_attempt += 1
             return self.create(
@@ -647,20 +639,9 @@ async def async_create(
             return await super().async_create(input_object = input_object, parse_stream = parse_stream, **kwargs)
         except RateLimitError as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            sleep_interval = 15.0
-            with contextlib.suppress(Exception):
-                if 'Please retry after' in str(e):
-                    sleep_interval = (
-                        float(
-                            str(e)
-                            .split("Please retry after")[1]
-                            .split("second")[0]
-                            .strip()
-                        )
-                        * 1.5
-                    )
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
             await asyncio.sleep(sleep_interval)
             current_attempt += 1
             return await self.async_create(
@@ -671,10 +652,11 @@ async def async_create(
                 _current_attempt = current_attempt,
                 **kwargs
             )
+        
         except APIError as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
             await asyncio.sleep(10.0)
             current_attempt += 1
             return await self.async_create(
@@ -686,10 +668,13 @@ async def async_create(
                 **kwargs
             )
 
+        except (InvalidMaxTokens, InvalidRequestError) as e:
+            raise e
+        
         except Exception as e:
             if current_attempt >= auto_retry_limit:
-                raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
-            logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
+                raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
+            logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
             await asyncio.sleep(10.0)
             current_attempt += 1
             return await self.async_create(
diff --git a/async_openai/types/errors.py b/async_openai/types/errors.py
@@ -1,6 +1,7 @@
 
 import json
 import aiohttpx
+import contextlib
 from typing import Any, Optional, Union, Dict
 from lazyops.types import BaseModel, lazyproperty
 
@@ -112,6 +113,10 @@ def __init__(
             should_retry=should_retry,
             **kwargs
         )
+        self.post_init(**kwargs)
+    
+    def post_init(self, **kwargs):
+        pass
     
     def __str__(self):
         msg = self.exc.error_message or "<empty message>"
@@ -161,7 +166,15 @@ class PermissionError(OpenAIError):
 
 
 class RateLimitError(OpenAIError):
-    pass
+    
+    def post_init(self, **kwargs):
+        """
+        Gets the rate limit reset time
+        """
+        self.retry_after_seconds: Optional[float] = None
+        with contextlib.suppress(Exception):
+            self.retry_after_seconds = float(self.exc.error_message.split("Please retry after", 1)[1].split("second", 1)[0].strip())
+
 
 
 class ServiceUnavailableError(OpenAIError):
@@ -172,16 +185,37 @@ class InvalidAPIType(OpenAIError):
     pass
 
 
+class InvalidMaxTokens(InvalidRequestError):
+    pass
+
+    def post_init(self, **kwargs):
+        """
+        Gets the maximum context length and requested max tokens
+        """
+        self.maximum_context_length: Optional[int] = None
+        self.requested_max_tokens: Optional[int] = None
+        with contextlib.suppress(Exception):
+            self.maximum_context_length = int(self.exc.error_message.split("maximum context length is", 1)[1].split(" ", 1)[0].strip())
+            self.requested_max_tokens = int(self.exc.error_message.split("requested", 1)[1].split(" ", 1)[0].strip())
+
+
 def fatal_exception(exc):
-    if isinstance(exc, OpenAIError):
-        # retry on server errors and client errors
-        # with 429 status code (rate limited),
-        # with 400, 404, 415 status codes (invalid request),
-        # don't retry on other client errors
-        return (400 <= exc.status < 500) and exc.status not in [429, 400, 404, 415]
-    else:
+    """
+    Checks if the exception is fatal.
+    """
+    if not isinstance(exc, OpenAIError):
         # retry on all other errors (eg. network)
         return False
+    
+    # retry on server errors and client errors
+    # with 429 status code (rate limited),
+    # with 400, 404, 415 status codes (invalid request),
+    # 400 can include invalid parameters, such as invalid `max_tokens`
+    # don't retry on other client errors
+    if isinstance(exc, (InvalidMaxTokens, InvalidRequestError)):
+        return True
+    
+    return (400 <= exc.status < 500) and exc.status not in [429, 400, 404, 415] # [429, 400, 404, 415]
 
 
 def error_handler(
@@ -207,6 +241,13 @@ def error_handler(
             **kwargs
         )
     if response.status_code in [400, 404, 415]:
+        if 'maximum context length' in response.text:
+            return InvalidMaxTokens(
+                response = response,
+                data = data,
+                should_retry = False,
+                **kwargs
+            )
         return InvalidRequestError(
             response = response,
             data = data,
@@ -249,24 +290,26 @@ def __init__(
         self,
         attempts: int,
         base_exception: OpenAIError,
+        name: Optional[str] = None,
     ):
+        self.name = name
         self.attempts = attempts
         self.ex = base_exception
     
     def __str__(self):
-        return f"Max {self.attempts} retries exceeded: {str(self.ex)}"
+        return f"[{self.name}] Max {self.attempts} retries exceeded: {str(self.ex)}"
         
         
     @property
     def user_message(self):
         """
         Returns the error message.
         """
-        return f"Max {self.attempts} retries exceeded: {self.ex.user_message}"
+        return f"[{self.name}] Max {self.attempts} retries exceeded: {self.ex.user_message}"
     
     def __repr__(self):
         """
         Returns the string representation of the error.
         """
-        return f"{repr(self.ex)} (attempts={self.attempts})"
+        return f"[{self.name}] {repr(self.ex)} (attempts={self.attempts})"
         
diff --git a/async_openai/version.py b/async_openai/version.py
@@ -1 +1 @@
-VERSION = '0.0.41rc2'
+VERSION = '0.0.41rc3'

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-VERSION = '0.0.41rc2'`
	`1`	`+VERSION = '0.0.41rc3'`