Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,28 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
return inner


@pytest.fixture
def get_rate_limit_model_response():
def inner(request_headers=None):
if request_headers is None:
request_headers = {}

model_request = HttpxRequest(
"POST",
"/responses",
headers=request_headers,
)

response = HttpxResponse(
429,
request=model_request,
)

return response

return inner


@pytest.fixture
def streaming_chat_completions_model_response():
return [
Expand Down
33 changes: 20 additions & 13 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,9 @@ def test_embeddings_no_pii(
assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]


def test_exception_handling(sentry_init, capture_events):
def test_exception_handling(
reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response
):
sentry_init(
integrations=[LiteLLMIntegration()],
traces_sample_rate=1.0,
Expand All @@ -474,19 +476,24 @@ def test_exception_handling(sentry_init, capture_events):

messages = [{"role": "user", "content": "Hello!"}]

with start_transaction(name="litellm test"):
kwargs = {
"model": "gpt-3.5-turbo",
"messages": messages,
}
client = OpenAI(api_key="z")

_input_callback(kwargs)
_failure_callback(
kwargs,
Exception("API rate limit reached"),
datetime.now(),
datetime.now(),
)
model_response = get_rate_limit_model_response()

with mock.patch.object(
client.embeddings._client._client,
"send",
return_value=model_response,
):
with start_transaction(name="litellm test"):
with pytest.raises(litellm.RateLimitError):
litellm.completion(
model="gpt-3.5-turbo",
messages=messages,
client=client,
)

litellm_utils.executor.shutdown(wait=True)

# Should have error event and transaction
assert len(events) >= 1
Expand Down
Loading