Skip to content

Commit 38c36a4

Browse files
committed
better error handling on auto-retry
- disables retry for invalid requests and maximum tokens
1 parent cfa5192 commit 38c36a4

File tree

4 files changed

+101
-87
lines changed

4 files changed

+101
-87
lines changed

async_openai/schemas/chat.py

Lines changed: 24 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from async_openai.types.resources import BaseResource, Usage
1313
from async_openai.types.responses import BaseResponse
1414
from async_openai.types.routes import BaseRoute
15-
from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded
15+
from async_openai.types.errors import RateLimitError, InvalidMaxTokens, InvalidRequestError, APIError, MaxRetriesExceeded
1616
from async_openai.utils import logger, get_max_chat_tokens, get_chat_tokens_count, parse_stream, aparse_stream
1717

1818

@@ -719,20 +719,9 @@ def create(
719719
return super().create(input_object = input_object, parse_stream = parse_stream, **kwargs)
720720
except RateLimitError as e:
721721
if current_attempt >= auto_retry_limit:
722-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
723-
sleep_interval = 15.0
724-
with contextlib.suppress(Exception):
725-
if 'Please retry after' in str(e):
726-
sleep_interval = (
727-
float(
728-
str(e)
729-
.split("Please retry after")[1]
730-
.split("second")[0]
731-
.strip()
732-
)
733-
* 1.5
734-
)
735-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
722+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
723+
sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0
724+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
736725
time.sleep(sleep_interval)
737726
current_attempt += 1
738727
return self.create(
@@ -743,10 +732,12 @@ def create(
743732
_current_attempt = current_attempt,
744733
**kwargs
745734
)
735+
736+
746737
except APIError as e:
747738
if current_attempt >= auto_retry_limit:
748-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
749-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
739+
raise MaxRetriesExceeded(name = self.name, attempts=current_attempt, base_exception = e) from e
740+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
750741
time.sleep(10.0)
751742
current_attempt += 1
752743
return self.create(
@@ -757,11 +748,14 @@ def create(
757748
_current_attempt = current_attempt,
758749
**kwargs
759750
)
760-
751+
752+
except (InvalidMaxTokens, InvalidRequestError) as e:
753+
raise e
754+
761755
except Exception as e:
762756
if current_attempt >= auto_retry_limit:
763-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
764-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
757+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
758+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
765759
time.sleep(10.0)
766760
current_attempt += 1
767761
return self.create(
@@ -893,20 +887,9 @@ async def async_create(
893887
return await super().async_create(input_object = input_object, parse_stream = parse_stream, **kwargs)
894888
except RateLimitError as e:
895889
if current_attempt >= auto_retry_limit:
896-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
897-
sleep_interval = 15.0
898-
with contextlib.suppress(Exception):
899-
if 'Please retry after' in str(e):
900-
sleep_interval = (
901-
float(
902-
str(e)
903-
.split("Please retry after")[1]
904-
.split("second")[0]
905-
.strip()
906-
)
907-
* 1.5
908-
)
909-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
890+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
891+
sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0
892+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
910893
await asyncio.sleep(sleep_interval)
911894
current_attempt += 1
912895
return await self.async_create(
@@ -919,8 +902,8 @@ async def async_create(
919902
)
920903
except APIError as e:
921904
if current_attempt >= auto_retry_limit:
922-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
923-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
905+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
906+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
924907
await asyncio.sleep(10.0)
925908
current_attempt += 1
926909
return await self.async_create(
@@ -932,10 +915,13 @@ async def async_create(
932915
**kwargs
933916
)
934917

918+
except (InvalidMaxTokens, InvalidRequestError) as e:
919+
raise e
920+
935921
except Exception as e:
936922
if current_attempt >= auto_retry_limit:
937-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
938-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
923+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
924+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
939925
await asyncio.sleep(10.0)
940926
current_attempt += 1
941927
return await self.async_create(

async_openai/schemas/completions.py

Lines changed: 22 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from async_openai.types.resources import BaseResource, Usage
1313
from async_openai.types.responses import BaseResponse
1414
from async_openai.types.routes import BaseRoute
15-
from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded
15+
from async_openai.types.errors import RateLimitError, APIError, MaxRetriesExceeded, InvalidMaxTokens, InvalidRequestError
1616
from async_openai.utils import logger, get_max_tokens, get_token_count, parse_stream, aparse_stream
1717

1818

@@ -463,20 +463,9 @@ def create(
463463
return super().create(input_object = input_object, parse_stream = parse_stream, **kwargs)
464464
except RateLimitError as e:
465465
if current_attempt >= auto_retry_limit:
466-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
467-
sleep_interval = 15.0
468-
with contextlib.suppress(Exception):
469-
if 'Please retry after' in str(e):
470-
sleep_interval = (
471-
float(
472-
str(e)
473-
.split("Please retry after")[1]
474-
.split("second")[0]
475-
.strip()
476-
)
477-
* 1.5
478-
)
479-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
466+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
467+
sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0
468+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
480469
time.sleep(sleep_interval)
481470
current_attempt += 1
482471
return self.create(
@@ -489,8 +478,8 @@ def create(
489478
)
490479
except APIError as e:
491480
if current_attempt >= auto_retry_limit:
492-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
493-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
481+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
482+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
494483
time.sleep(10.0)
495484
current_attempt += 1
496485
return self.create(
@@ -502,10 +491,13 @@ def create(
502491
**kwargs
503492
)
504493

494+
except (InvalidMaxTokens, InvalidRequestError) as e:
495+
raise e
496+
505497
except Exception as e:
506498
if current_attempt >= auto_retry_limit:
507-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
508-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
499+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
500+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
509501
time.sleep(10.0)
510502
current_attempt += 1
511503
return self.create(
@@ -647,20 +639,9 @@ async def async_create(
647639
return await super().async_create(input_object = input_object, parse_stream = parse_stream, **kwargs)
648640
except RateLimitError as e:
649641
if current_attempt >= auto_retry_limit:
650-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
651-
sleep_interval = 15.0
652-
with contextlib.suppress(Exception):
653-
if 'Please retry after' in str(e):
654-
sleep_interval = (
655-
float(
656-
str(e)
657-
.split("Please retry after")[1]
658-
.split("second")[0]
659-
.strip()
660-
)
661-
* 1.5
662-
)
663-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
642+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
643+
sleep_interval = e.retry_after_seconds * 1.5 if e.retry_after_seconds else 15.0
644+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Rate Limit Error. Sleeping for {sleep_interval} seconds')
664645
await asyncio.sleep(sleep_interval)
665646
current_attempt += 1
666647
return await self.async_create(
@@ -671,10 +652,11 @@ async def async_create(
671652
_current_attempt = current_attempt,
672653
**kwargs
673654
)
655+
674656
except APIError as e:
675657
if current_attempt >= auto_retry_limit:
676-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
677-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
658+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
659+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] API Error: {e}. Sleeping for 10 seconds')
678660
await asyncio.sleep(10.0)
679661
current_attempt += 1
680662
return await self.async_create(
@@ -686,10 +668,13 @@ async def async_create(
686668
**kwargs
687669
)
688670

671+
except (InvalidMaxTokens, InvalidRequestError) as e:
672+
raise e
673+
689674
except Exception as e:
690675
if current_attempt >= auto_retry_limit:
691-
raise MaxRetriesExceeded(attempts=current_attempt, base_exception=e) from e
692-
logger.warning(f'[{current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
676+
raise MaxRetriesExceeded(name = self.name, attempts = current_attempt, base_exception = e) from e
677+
logger.warning(f'[{self.name}: {current_attempt}/{auto_retry_limit}] Unknown Error: {e}. Sleeping for 10 seconds')
693678
await asyncio.sleep(10.0)
694679
current_attempt += 1
695680
return await self.async_create(

async_openai/types/errors.py

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11

22
import json
33
import aiohttpx
4+
import contextlib
45
from typing import Any, Optional, Union, Dict
56
from lazyops.types import BaseModel, lazyproperty
67

@@ -112,6 +113,10 @@ def __init__(
112113
should_retry=should_retry,
113114
**kwargs
114115
)
116+
self.post_init(**kwargs)
117+
118+
def post_init(self, **kwargs):
119+
pass
115120

116121
def __str__(self):
117122
msg = self.exc.error_message or "<empty message>"
@@ -161,7 +166,15 @@ class PermissionError(OpenAIError):
161166

162167

163168
class RateLimitError(OpenAIError):
164-
pass
169+
170+
def post_init(self, **kwargs):
171+
"""
172+
Gets the rate limit reset time
173+
"""
174+
self.retry_after_seconds: Optional[float] = None
175+
with contextlib.suppress(Exception):
176+
self.retry_after_seconds = float(self.exc.error_message.split("Please retry after", 1)[1].split("second", 1)[0].strip())
177+
165178

166179

167180
class ServiceUnavailableError(OpenAIError):
@@ -172,16 +185,37 @@ class InvalidAPIType(OpenAIError):
172185
pass
173186

174187

188+
class InvalidMaxTokens(InvalidRequestError):
189+
pass
190+
191+
def post_init(self, **kwargs):
192+
"""
193+
Gets the maximum context length and requested max tokens
194+
"""
195+
self.maximum_context_length: Optional[int] = None
196+
self.requested_max_tokens: Optional[int] = None
197+
with contextlib.suppress(Exception):
198+
self.maximum_context_length = int(self.exc.error_message.split("maximum context length is", 1)[1].split(" ", 1)[0].strip())
199+
self.requested_max_tokens = int(self.exc.error_message.split("requested", 1)[1].split(" ", 1)[0].strip())
200+
201+
175202
def fatal_exception(exc):
176-
if isinstance(exc, OpenAIError):
177-
# retry on server errors and client errors
178-
# with 429 status code (rate limited),
179-
# with 400, 404, 415 status codes (invalid request),
180-
# don't retry on other client errors
181-
return (400 <= exc.status < 500) and exc.status not in [429, 400, 404, 415]
182-
else:
203+
"""
204+
Checks if the exception is fatal.
205+
"""
206+
if not isinstance(exc, OpenAIError):
183207
# retry on all other errors (eg. network)
184208
return False
209+
210+
# retry on server errors and client errors
211+
# with 429 status code (rate limited),
212+
# with 400, 404, 415 status codes (invalid request),
213+
# 400 can include invalid parameters, such as invalid `max_tokens`
214+
# don't retry on other client errors
215+
if isinstance(exc, (InvalidMaxTokens, InvalidRequestError)):
216+
return True
217+
218+
return (400 <= exc.status < 500) and exc.status not in [429, 400, 404, 415] # [429, 400, 404, 415]
185219

186220

187221
def error_handler(
@@ -207,6 +241,13 @@ def error_handler(
207241
**kwargs
208242
)
209243
if response.status_code in [400, 404, 415]:
244+
if 'maximum context length' in response.text:
245+
return InvalidMaxTokens(
246+
response = response,
247+
data = data,
248+
should_retry = False,
249+
**kwargs
250+
)
210251
return InvalidRequestError(
211252
response = response,
212253
data = data,
@@ -249,24 +290,26 @@ def __init__(
249290
self,
250291
attempts: int,
251292
base_exception: OpenAIError,
293+
name: Optional[str] = None,
252294
):
295+
self.name = name
253296
self.attempts = attempts
254297
self.ex = base_exception
255298

256299
def __str__(self):
257-
return f"Max {self.attempts} retries exceeded: {str(self.ex)}"
300+
return f"[{self.name}] Max {self.attempts} retries exceeded: {str(self.ex)}"
258301

259302

260303
@property
261304
def user_message(self):
262305
"""
263306
Returns the error message.
264307
"""
265-
return f"Max {self.attempts} retries exceeded: {self.ex.user_message}"
308+
return f"[{self.name}] Max {self.attempts} retries exceeded: {self.ex.user_message}"
266309

267310
def __repr__(self):
268311
"""
269312
Returns the string representation of the error.
270313
"""
271-
return f"{repr(self.ex)} (attempts={self.attempts})"
314+
return f"[{self.name}] {repr(self.ex)} (attempts={self.attempts})"
272315

async_openai/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = '0.0.41rc2'
1+
VERSION = '0.0.41rc3'

0 commit comments

Comments
 (0)