Show users a special message for context length errors (#1337)

* Show users a special message for context length errors * More helpful error message * More helpful error message
Azure-Samples · Feb 28, 2024 · 840f639 · 840f639
1 parent ad6fa14
commit 840f639
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 0 deletions.
diff --git a/app/backend/error.py b/app/backend/error.py
@@ -9,10 +9,14 @@
 """
 ERROR_MESSAGE_FILTER = """Your message contains content that was flagged by the OpenAI content filter."""
 
+ERROR_MESSAGE_LENGTH = """Your message exceeded the context length limit for this OpenAI model. Please shorten your message or change your settings to retrieve fewer search results."""
+
 
 def error_dict(error: Exception) -> dict:
     if isinstance(error, APIError) and error.code == "content_filter":
         return {"error": ERROR_MESSAGE_FILTER}
+    if isinstance(error, APIError) and error.code == "context_length_exceeded":
+        return {"error": ERROR_MESSAGE_LENGTH}
     return {"error": ERROR_MESSAGE.format(error_type=type(error))}
 
 

diff --git a/tests/snapshots/test_app/test_ask_handle_exception_contextlength/client0/result.json b/tests/snapshots/test_app/test_ask_handle_exception_contextlength/client0/result.json
@@ -0,0 +1,3 @@
+{
+    "error": "Your message exceeded the context length limit for this OpenAI model. Please shorten your message or change your settings to retrieve fewer search results."
+}
diff --git a/tests/snapshots/test_app/test_ask_handle_exception_contextlength/client1/result.json b/tests/snapshots/test_app/test_ask_handle_exception_contextlength/client1/result.json
@@ -0,0 +1,3 @@
+{
+    "error": "Your message exceeded the context length limit for this OpenAI model. Please shorten your message or change your settings to retrieve fewer search results."
+}
diff --git a/tests/test_app.py b/tests/test_app.py
@@ -30,6 +30,16 @@ def fake_response(http_code):
     ),
 )
 
+contextlength_response = BadRequestError(
+    message="This model's maximum context length is 4096 tokens. However, your messages resulted in 5069 tokens. Please reduce the length of the messages.",
+    body={
+        "message": "This model's maximum context length is 4096 tokens. However, your messages resulted in 5069 tokens. Please reduce the length of the messages.",
+        "code": "context_length_exceeded",
+        "status": 400,
+    },
+    response=Response(400, request=Request(method="get", url="https://foo.bar/"), json={"error": {"code": "429"}}),
+)
+
 
 def thoughts_contains_text(thoughts, text):
     found = False
@@ -115,6 +125,26 @@ async def test_ask_handle_exception_contentsafety(client, monkeypatch, snapshot,
     snapshot.assert_match(json.dumps(result, indent=4), "result.json")
 
 
+@pytest.mark.asyncio
+async def test_ask_handle_exception_contextlength(client, monkeypatch, snapshot, caplog):
+    monkeypatch.setattr(
+        "approaches.retrievethenread.RetrieveThenReadApproach.run",
+        mock.Mock(side_effect=contextlength_response),
+    )
+
+    response = await client.post(
+        "/ask",
+        json={"messages": [{"content": "Super long message with lots of sources.", "role": "user"}]},
+    )
+    assert response.status_code == 500
+    result = await response.get_json()
+    assert (
+        "Exception in /ask: This model's maximum context length is 4096 tokens. However, your messages resulted in 5069 tokens. Please reduce the length of the messages."
+        in caplog.text
+    )
+    snapshot.assert_match(json.dumps(result, indent=4), "result.json")
+
+
 @pytest.mark.asyncio
 async def test_ask_rtr_text(client, snapshot):
     response = await client.post(