Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 63 additions & 2 deletions src/agents/run_internal/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"local_shell_call": "local_shell_call_output",
"tool_search_call": "tool_search_output",
}
_CALL_OUTPUT_TYPES: frozenset[str] = frozenset(_TOOL_CALL_TO_OUTPUT_TYPE.values())

__all__ = [
"ReasoningItemIdPolicy",
Expand All @@ -37,6 +38,7 @@
"TOOL_CALL_SESSION_TITLE_KEY",
"copy_input_items",
"drop_orphan_function_calls",
"drop_orphaned_messages_after_consumed_reasoning",
"ensure_input_item_format",
"prepare_model_input_items",
"run_item_to_input_item",
Expand Down Expand Up @@ -179,6 +181,63 @@ def _drop_reasoning_items_preceding_dropped_calls(
return [entry for idx, entry in enumerate(items) if idx not in excluded]


def drop_orphaned_messages_after_consumed_reasoning(
items: list[TResponseInputItem],
) -> list[TResponseInputItem]:
"""Drop message items that are orphaned because their preceding reasoning item was consumed
by a tool call.

The Responses API requires every message item to be paired with its own reasoning item. When
any tool call (function_call, computer_call, shell_call, etc.) follows a reasoning item, that
reasoning item is considered consumed by the call. Any message item that follows (e.g. the
handoff agent's closing message) has no paired reasoning and causes a 400 from some providers:
``Item 'msg_...' of type 'message' was provided without its required 'reasoning' item``.

The drop is scoped to the first message after the consuming call. Dropping resets the flag so
that later turns whose assistant messages legitimately lack a reasoning item are not affected.

This is the inverse of :func:`drop_orphan_function_calls`, which removes function calls
without outputs and their preceding reasoning items.
"""
fresh_reasoning = False # True when the most-recent reasoning item is not yet consumed
consumed_by_call = False # True after any tool call consumes the fresh reasoning
result: list[TResponseInputItem] = []

for item in items:
if not isinstance(item, dict):
result.append(item)
continue
item_type = item.get("type")

if item_type == "reasoning":
fresh_reasoning = True
consumed_by_call = False
result.append(item)
elif item_type in _TOOL_CALL_TO_OUTPUT_TYPE:
if fresh_reasoning:
fresh_reasoning = False
consumed_by_call = True # reasoning is now consumed by this call
result.append(item)
elif item_type in _CALL_OUTPUT_TYPES:
# Any call output (function_call_output, computer_call_output, etc.) marks the
# end of its call sequence. The SDK appends call outputs after all model output
# items, so any orphaned message has already been dropped by this point. Reset
# here so that turns with no trailing message do not bleed consumed_by_call into
# the next agent's responses regardless of the call type.
consumed_by_call = False
result.append(item)
elif item_type == "message":
if not consumed_by_call or item.get("role") != "assistant":
result.append(item)
Comment thread
utkarshkr100 marked this conversation as resolved.
Comment on lines +229 to +231
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve user messages after consumed tool calls

When the consumed-call state is still set, this branch drops every type == "message" item without checking role. In a resumed/session history that ends with an unanswered reasoning-backed tool call, the next turn's user input is represented as a message immediately after that stale call; this new pass removes the user's follow-up before drop_orphan_function_calls() later cleans up the stale call/reasoning pair, so the next model request can silently lose the user's input. The pruning should be limited to assistant/model messages that can actually be orphaned by consumed reasoning.

Useful? React with 👍 / 👎.

# else: orphaned assistant message — reasoning consumed by the preceding call; drop
# without resetting so that any further assistant messages in the same turn are also
# dropped until a call-output item resets consumed_by_call.
else:
result.append(item)

return result


def ensure_input_item_format(item: TResponseInputItem) -> TResponseInputItem:
"""Ensure a single item is normalized for model input."""
coerced = _coerce_to_dict(item)
Expand Down Expand Up @@ -213,7 +272,8 @@ def prepare_model_input_items(
return normalized_caller_items

normalized_generated_items = normalize_input_items_for_api(list(generated_items))
filtered_generated_items = drop_orphan_function_calls(normalized_generated_items)
filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(normalized_generated_items)
filtered_generated_items = drop_orphan_function_calls(filtered_generated_items)
return normalized_caller_items + filtered_generated_items


Expand All @@ -223,7 +283,8 @@ def normalize_resumed_input(
"""Normalize resumed list inputs and drop orphan tool calls."""
if isinstance(raw_input, list):
normalized = normalize_input_items_for_api(raw_input)
return drop_orphan_function_calls(normalized)
filtered = drop_orphaned_messages_after_consumed_reasoning(normalized)
return drop_orphan_function_calls(filtered)
return raw_input


Expand Down
4 changes: 3 additions & 1 deletion src/agents/run_internal/oai_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .items import (
ReasoningItemIdPolicy,
drop_orphan_function_calls,
drop_orphaned_messages_after_consumed_reasoning,
fingerprint_input_item,
normalize_input_items_for_api,
prepare_model_input_items,
Expand Down Expand Up @@ -501,7 +502,8 @@ def prepare_input(
normalized_generated_items, prepared_generated_items, strict=False
)
}
filtered_generated_items = drop_orphan_function_calls(normalized_generated_items)
filtered_generated_items = drop_orphaned_messages_after_consumed_reasoning(normalized_generated_items)
filtered_generated_items = drop_orphan_function_calls(filtered_generated_items)
for item in filtered_generated_items:
prepared_source_item = normalized_generated_sources.get(id(item))
if prepared_source_item is not None:
Expand Down
16 changes: 14 additions & 2 deletions src/agents/run_internal/session_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
copy_input_items,
deduplicate_input_items_preferring_latest,
drop_orphan_function_calls,
drop_orphaned_messages_after_consumed_reasoning,
ensure_input_item_format,
fingerprint_input_item,
normalize_input_items_for_api,
Expand Down Expand Up @@ -176,9 +177,20 @@ async def prepare_input_with_session(
prune_history_indexes,
)
prepared_as_inputs = [ensure_input_item_format(item) for item in prepared_items_raw]
# Snapshot which prepared items are history items by object identity before the filtering
# pass. drop_orphaned_messages_after_consumed_reasoning may remove items and shift positions,
# so prune_history_indexes (built from pre-filter offsets) would be wrong for the subsequent
# drop_orphan_function_calls call. Rebuild the index set from surviving item identities.
history_ids_in_prepared = {
id(prepared_as_inputs[i]) for i in prune_history_indexes if i < len(prepared_as_inputs)
}
filtered = drop_orphaned_messages_after_consumed_reasoning(prepared_as_inputs)
adjusted_prune_indexes = {
idx for idx, item in enumerate(filtered) if id(item) in history_ids_in_prepared
}
filtered = drop_orphan_function_calls(
prepared_as_inputs,
pruning_indexes=prune_history_indexes,
filtered,
pruning_indexes=adjusted_prune_indexes,
)
normalized = normalize_input_items_for_api(filtered)
deduplicated = deduplicate_input_items_preferring_latest(normalized)
Expand Down
Loading