feat(openai_agents): forward previous_response_id from SDK kwarg

x · x · commit 3d8f266e5a49 · 2026-04-30T17:38:26.000-04:00
The OpenAI Agents SDK's `Model.get_response` abstract has three
keyword-only parameters: `previous_response_id`, `conversation_id`,
`prompt`. The SDK threads them down through `_ServerConversationTracker`
when callers use `Runner.run(..., previous_response_id=X)` or set
`RunConfig` with `auto_previous_response_id=True`.

`TemporalStreamingModel.get_response` was declared with
`**kwargs # noqa: ARG002`, which silently swallowed all three. Callers
who used the SDK's official chaining API saw their `previous_response_id`
disappear and got no stateful behavior — without an error.

This commit:

- Replaces `**kwargs` with explicit `previous_response_id`,
  `conversation_id`, `prompt` params, matching the abstract.
- Forwards `previous_response_id` to `responses.create` via
  `_non_null_or_not_given` (so `None` resolves to `NOT_GIVEN` and the
  field is omitted from the request body — identical behavior to today
  for callers that don't opt in).
- Accepts `conversation_id` and `prompt` for SDK contract compliance
  but does not forward them yet (marked `# noqa: ARG002`); they can be
  wired through later if a use case appears.

## Compatibility with non-OpenAI backends

Same opt-in pattern as `prompt_cache_key`. `TemporalStreamingModel`
calls `responses.create`, but the underlying client can be pointed at
any OpenAI-compatible server (LiteLLM proxy, Foundry, vLLM, etc.).
Some of those backends don't recognize `previous_response_id`. Because
we forward it only when explicitly set, callers who don't opt in see
no change in the wire request — the field is filtered out by
`NOT_GIVEN`. Callers who opt in are responsible for knowing whether
their backend supports it.

## Test housekeeping

The 27 existing tests that passed `task_id=sample_task_id` to
`get_response` were relying on `**kwargs` to silently swallow it.
Production reads `task_id` from a ContextVar (set by
`ContextInterceptor` in real Temporal flows, set by the
`_streaming_context_vars` fixture in tests), not from a function
argument. The kwarg was vestigial cruft. Removed.
diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py b/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py
@@ -51,6 +51,7 @@
     ResponseReasoningSummaryTextDeltaEvent,
     ResponseFunctionCallArgumentsDeltaEvent,
 )
+from openai.types.responses.response_prompt_param import ResponsePromptParam
 
 # AgentEx SDK imports
 from agentex.lib import adk
@@ -465,12 +466,25 @@ async def get_response(
         output_schema: Optional[AgentOutputSchemaBase],
         handoffs: list[Handoff],
         tracing: ModelTracing,  # noqa: ARG002
-        **kwargs,  # noqa: ARG002
+        *,
+        previous_response_id: Optional[str] = None,
+        conversation_id: Optional[str] = None,  # noqa: ARG002
+        prompt: Optional[ResponsePromptParam] = None,  # noqa: ARG002
     ) -> ModelResponse:
         """Get a non-streaming response from the model with streaming to Redis.
 
         This method is used by Temporal activities and needs to return a complete
         response, but we stream the response to Redis while generating it.
+
+        ``previous_response_id`` enables stateful multi-turn chaining on the
+        Responses API: when set, the server retains the prior response's
+        chain-of-thought and only the new input items need to be sent. Forwarded
+        only when explicitly set — not all OpenAI-compatible backends support
+        this parameter, so the default is omitted from the request body via
+        ``NOT_GIVEN``.
+
+        ``conversation_id`` and ``prompt`` are accepted to satisfy the
+        ``Model.get_response`` abstract contract but not currently forwarded.
         """
         
         task_id = streaming_task_id.get()
@@ -595,6 +609,7 @@ async def get_response(
                     extra_query=model_settings.extra_query,
                     extra_body=model_settings.extra_body,
                     prompt_cache_key=prompt_cache_key,
+                    previous_response_id=self._non_null_or_not_given(previous_response_id),
                     # Any additional parameters from extra_args
                     **extra_args,
                 )
diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py
@@ -43,7 +43,6 @@ async def test_temperature_setting(self, streaming_model, _streaming_context_var
                 output_schema=None,
                 handoffs=[],
                 tracing=None,
-                task_id=sample_task_id
             )
 
             # Verify temperature was passed correctly
@@ -73,7 +72,6 @@ async def test_top_p_setting(self, streaming_model, _streaming_context_vars, sam
                 output_schema=None,
                 handoffs=[],
                 tracing=None,
-                task_id=sample_task_id
             )
 
             create_call = streaming_model.client.responses.create.call_args
@@ -101,7 +99,6 @@ async def test_max_tokens_setting(self, streaming_model, _streaming_context_vars
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -131,7 +128,6 @@ async def test_reasoning_effort_settings(self, streaming_model, _streaming_conte
                 output_schema=None,
                 handoffs=[],
                 tracing=None,
-                task_id=sample_task_id
             )
 
             create_call = streaming_model.client.responses.create.call_args
@@ -161,7 +157,6 @@ async def test_reasoning_summary_settings(self, streaming_model, _streaming_cont
                 output_schema=None,
                 handoffs=[],
                 tracing=None,
-                task_id=sample_task_id
             )
 
             create_call = streaming_model.client.responses.create.call_args
@@ -199,7 +194,6 @@ async def test_tool_choice_variations(self, streaming_model, _streaming_context_
                 output_schema=None,
                 handoffs=[],
                 tracing=None,
-                task_id=sample_task_id
             )
 
             create_call = streaming_model.client.responses.create.call_args
@@ -227,7 +221,6 @@ async def test_parallel_tool_calls(self, streaming_model, _streaming_context_var
                 output_schema=None,
                 handoffs=[],
                 tracing=None,
-                task_id=sample_task_id
             )
 
             create_call = streaming_model.client.responses.create.call_args
@@ -255,7 +248,6 @@ async def test_truncation_strategy(self, streaming_model, _streaming_context_var
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -284,7 +276,6 @@ async def test_response_include(self, streaming_model, _streaming_context_vars,
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -314,7 +305,6 @@ async def test_verbosity(self, streaming_model, _streaming_context_vars, sample_
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -347,7 +337,6 @@ async def test_metadata_and_store(self, streaming_model, _streaming_context_vars
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -383,7 +372,6 @@ async def test_extra_headers_and_body(self, streaming_model, _streaming_context_
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -412,7 +400,6 @@ async def test_top_logprobs(self, streaming_model, _streaming_context_vars, samp
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -445,7 +432,6 @@ async def test_function_tool(self, streaming_model, _streaming_context_vars, sam
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -475,7 +461,6 @@ async def test_web_search_tool(self, streaming_model, _streaming_context_vars, s
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -502,7 +487,6 @@ async def test_file_search_tool(self, streaming_model, _streaming_context_vars,
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -531,7 +515,6 @@ async def test_computer_tool(self, streaming_model, _streaming_context_vars, sam
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -563,7 +546,6 @@ async def test_multiple_computer_tools_error(self, streaming_model, _streaming_c
                 output_schema=None,
                 handoffs=[],
                 tracing=None,
-                task_id=sample_task_id
             )
 
     @pytest.mark.asyncio
@@ -585,7 +567,6 @@ async def test_hosted_mcp_tool(self, streaming_model, _streaming_context_vars, s
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -613,7 +594,6 @@ async def test_image_generation_tool(self, streaming_model, _streaming_context_v
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -640,7 +620,6 @@ async def test_code_interpreter_tool(self, streaming_model, _streaming_context_v
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -667,7 +646,6 @@ async def test_local_shell_tool(self, streaming_model, _streaming_context_vars,
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -695,7 +673,6 @@ async def test_handoffs(self, streaming_model, _streaming_context_vars, sample_t
             output_schema=None,
             handoffs=[sample_handoff],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -725,7 +702,6 @@ async def test_mixed_tools(self, streaming_model, _streaming_context_vars, sampl
             output_schema=None,
             handoffs=[sample_handoff],
             tracing=None,
-            task_id=sample_task_id
         )
 
         create_call = streaming_model.client.responses.create.call_args
@@ -770,7 +746,6 @@ async def test_responses_api_streaming(self, streaming_model, mock_adk_streaming
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         # Verify streaming context was created
@@ -845,7 +820,6 @@ async def test_redis_context_creation(self, streaming_model, mock_adk_streaming,
             output_schema=None,
             handoffs=[],
             tracing=None,
-            task_id=sample_task_id
         )
 
         # Should create at least one context for reasoning
@@ -1113,4 +1087,90 @@ async def test_prompt_cache_key_forwarded_when_opted_in(
         kwargs = model.client.responses.create.call_args.kwargs
         assert kwargs["prompt_cache_key"] == "my-key"
         # Must be popped from extra_args so the SDK doesn't see it twice.
-        assert list(kwargs).count("prompt_cache_key") == 1
+        assert list(kwargs).count("prompt_cache_key") == 1
+
+    @pytest.mark.asyncio
+    async def test_previous_response_id_not_sent_by_default(
+        self,
+        streaming_model_with_mock_tracer,
+        _streaming_context_vars,  # noqa: ARG002
+    ):
+        """Without an opt-in, previous_response_id resolves to NOT_GIVEN.
+
+        Critical for non-Responses-API-native backends (e.g. Claude-via-LiteLLM)
+        where unknown fields on the request body could be rejected. NOT_GIVEN
+        is filtered before serialization, so the field is omitted entirely.
+        """
+        model = streaming_model_with_mock_tracer
+        completed = self._make_response_completed_event()
+        model.client.responses.create = AsyncMock(return_value=self._async_iter([completed]))
+
+        await model.get_response(
+            system_instructions=None,
+            input="hi",
+            model_settings=ModelSettings(),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=None,
+        )
+
+        kwargs = model.client.responses.create.call_args.kwargs
+        assert kwargs["previous_response_id"] is NOT_GIVEN
+
+    @pytest.mark.asyncio
+    async def test_previous_response_id_forwarded_via_sdk_kwarg(
+        self,
+        streaming_model_with_mock_tracer,
+        _streaming_context_vars,  # noqa: ARG002
+    ):
+        """The SDK threads previous_response_id as a keyword arg per Model.get_response
+        abstract contract. Verify it reaches responses.create instead of being silently
+        swallowed (which was the prior behavior under **kwargs)."""
+        model = streaming_model_with_mock_tracer
+        completed = self._make_response_completed_event()
+        model.client.responses.create = AsyncMock(return_value=self._async_iter([completed]))
+
+        await model.get_response(
+            system_instructions=None,
+            input="hi",
+            model_settings=ModelSettings(),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=None,
+            previous_response_id="resp_prior_turn",
+        )
+
+        kwargs = model.client.responses.create.call_args.kwargs
+        assert kwargs["previous_response_id"] == "resp_prior_turn"
+
+    @pytest.mark.asyncio
+    async def test_conversation_id_and_prompt_accepted_but_not_forwarded(
+        self,
+        streaming_model_with_mock_tracer,
+        _streaming_context_vars,  # noqa: ARG002
+    ):
+        """conversation_id and prompt are accepted to satisfy the SDK abstract
+        contract but not currently forwarded to responses.create."""
+        model = streaming_model_with_mock_tracer
+        completed = self._make_response_completed_event()
+        model.client.responses.create = AsyncMock(return_value=self._async_iter([completed]))
+
+        # Should not raise — both kwargs are accepted by the signature.
+        await model.get_response(
+            system_instructions=None,
+            input="hi",
+            model_settings=ModelSettings(),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=None,
+            conversation_id="conv_test",
+            prompt=None,
+        )
+
+        kwargs = model.client.responses.create.call_args.kwargs
+        # Neither should appear in the outgoing request kwargs.
+        assert "conversation_id" not in kwargs
+        assert "prompt" not in kwargs