Fix index field not populated in streaming mode with n>1 and tool calls (#15962)

Sameerlite · ishaan-jaff · web-flow · commit 396ab80f56a8 · 2025-11-02T09:52:31.000-08:00
* fix index tool calling in streaming

* moved test to llm translation

---------

Co-authored-by: Ishaan Jaffer &lt;ishaanjaffer0324@gmail.com&gt;
diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py
@@ -734,6 +734,14 @@ def is_chunk_non_empty(
                 "function_call" in completion_obj
                 and completion_obj["function_call"] is not None
             )
+            or (
+                 "tool_calls" in model_response.choices[0].delta
+                and model_response.choices[0].delta["tool_calls"] is not None
+            )
+            or (
+                "function_call" in model_response.choices[0].delta
+                and model_response.choices[0].delta["function_call"] is not None
+            )
             or (
                 "reasoning_content" in model_response.choices[0].delta
                 and model_response.choices[0].delta.reasoning_content is not None
diff --git a/tests/llm_translation/test_openai.py b/tests/llm_translation/test_openai.py
@@ -1308,3 +1308,107 @@ def test_openai_gpt_5_codex_reasoning():
     print("response: ", response)
     for chunk in response:
         print("chunk: ", chunk)
+
+
+# Tests moved from test_streaming_n_with_tools.py
+# Regression test for: https://github.com/BerriAI/litellm/issues/8977
+@pytest.mark.parametrize("model", ["gpt-4o", "gpt-4-turbo"])
+@pytest.mark.asyncio
+async def test_streaming_tool_calls_with_n_greater_than_1(model):
+    """
+    Test that the index field in a choice object is correctly populated
+    when using streaming mode with n>1 and tool calls.
+    
+    Regression test for: https://github.com/BerriAI/litellm/issues/8977
+    """
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "strict": True,
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                        },
+                    },
+                    "required": ["location", "unit"],
+                    "additionalProperties": False,
+                },
+            },
+        }
+    ]
+    
+    response = litellm.completion(
+        model=model,
+        messages=[
+            {
+                "role": "user",
+                "content": "What is the weather in San Francisco?",
+            },
+        ],
+        tools=tools,
+        stream=True,
+        n=3,
+    )
+    
+    # Collect all chunks and their indices
+    indices_seen = []
+    for chunk in response:
+        assert len(chunk.choices) == 1, "Each streaming chunk should have exactly 1 choice"
+        assert hasattr(chunk.choices[0], "index"), "Choice should have an index attribute"
+        index = chunk.choices[0].index
+        indices_seen.append(index)
+    
+    # Verify that we got chunks with different indices (0, 1, 2 for n=3)
+    unique_indices = set(indices_seen)
+    assert unique_indices == {0, 1, 2}, f"Should have indices 0, 1, 2 for n=3, got {unique_indices}"
+    
+    print(f"✓ Test passed: streaming with n=3 and tool calls correctly populates index field")
+    print(f"  Indices seen: {indices_seen}")
+    print(f"  Unique indices: {unique_indices}")
+
+
+@pytest.mark.parametrize("model", ["gpt-4o"])
+@pytest.mark.asyncio
+async def test_streaming_content_with_n_greater_than_1(model):
+    """
+    Test that the index field is correctly populated for regular content streaming
+    (not tool calls) with n>1.
+    """
+    response = litellm.completion(
+        model=model,
+        messages=[
+            {
+                "role": "user",
+                "content": "Say hello in one word",
+            },
+        ],
+        stream=True,
+        n=2,
+        max_tokens=10,
+    )
+    
+    # Collect all chunks and their indices
+    indices_seen = []
+    for chunk in response:
+        assert len(chunk.choices) == 1, "Each streaming chunk should have exactly 1 choice"
+        assert hasattr(chunk.choices[0], "index"), "Choice should have an index attribute"
+        index = chunk.choices[0].index
+        indices_seen.append(index)
+    
+    # Verify that we got chunks with different indices (0, 1 for n=2)
+    unique_indices = set(indices_seen)
+    assert unique_indices == {0, 1}, f"Should have indices 0, 1 for n=2, got {unique_indices}"
+    
+    print(f"✓ Test passed: streaming with n=2 and regular content correctly populates index field")
+    print(f"  Indices seen: {indices_seen}")
+    print(f"  Unique indices: {unique_indices}")