diff --git a/pkg/session/sanitize_orphan_test.go b/pkg/session/sanitize_orphan_test.go new file mode 100644 index 000000000..4b21a7944 --- /dev/null +++ b/pkg/session/sanitize_orphan_test.go @@ -0,0 +1,116 @@ +package session + +import ( + "testing" + + "github.com/docker/docker-agent/pkg/agent" + "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/tools" +) + +// assertToolPairingInvariant fails if any tool-result message lacks a matching +// tool_call ID in a preceding assistant message. This is exactly the invariant +// AWS Bedrock's ConverseStream enforces: "The number of toolResult blocks ... +// exceeds the number of toolUse blocks of previous turn." (issue #1676) +func assertToolPairingInvariant(t *testing.T, messages []chat.Message) { + t.Helper() + seen := map[string]bool{} + for i, m := range messages { + if m.Role == chat.MessageRoleAssistant { + for _, tc := range m.ToolCalls { + seen[tc.ID] = true + } + } + if m.Role == chat.MessageRoleTool { + if !seen[m.ToolCallID] { + t.Errorf("orphaned tool result at index %d: tool_call_id %q has no preceding toolUse (Bedrock would reject this)", + i, m.ToolCallID) + } + } + } +} + +// TestSanitizeToolCalls_DropsOrphanedToolResult is a regression test for issue +// #1676 (session resume fails on AWS Bedrock with a toolResult/toolUse count +// mismatch). +// +// Scenario: a session is compacted, and the kept-tail boundary (startIndex / +// FirstKeptEntry) lands *after* the assistant message that issued a tool call +// but *before* its tool result. On resume, buildSessionSummaryMessages emits +// the conversation from that boundary, so the reconstructed history begins with +// a tool-result message whose assistant (toolUse) was left behind the summary. +// +// sanitizeToolCalls is the final guard before the request hits the provider and +// must drop that orphaned result so the request stays valid. +func TestSanitizeToolCalls_DropsOrphanedToolResult(t *testing.T) { + // History as reconstructed on resume after compaction: the toolUse-bearing + // assistant message is gone (folded into the summary), only its result and + // the following turn survive. + reconstructed := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "Session Summary: ..."}, + // <-- assistant message with ToolCalls{ID:"tc1"} was dropped here + {Role: chat.MessageRoleTool, ToolCallID: "tc1", Content: "file contents"}, + {Role: chat.MessageRoleAssistant, Content: "Here is the file."}, + {Role: chat.MessageRoleUser, Content: "thanks, now resume"}, + } + + out := sanitizeToolCalls(reconstructed) + + // The orphaned tool result must be gone, leaving a Bedrock-valid sequence. + assertToolPairingInvariant(t, out) + for i, m := range out { + if m.Role == chat.MessageRoleTool && m.ToolCallID == "tc1" { + t.Fatalf("orphaned tool result tc1 should have been dropped, still present at index %d", i) + } + } +} + +// TestSanitizeToolCalls_KeepsMissingResultBalanced pins the opposite direction: +// a dangling toolUse (no result) gets a synthetic result and stays balanced. +func TestSanitizeToolCalls_KeepsMissingResultBalanced(t *testing.T) { + messages := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "hi"}, + {Role: chat.MessageRoleAssistant, ToolCalls: []tools.ToolCall{{ID: "tc1"}}}, + // result missing + {Role: chat.MessageRoleUser, Content: "next"}, + } + out := sanitizeToolCalls(messages) + assertToolPairingInvariant(t, out) // synthetic result injected +} + +// TestGetMessages_ResumeAfterCompaction_NoOrphanedToolResult exercises the full +// GetMessages reconstruction path that runs on the first turn after a session +// is resumed (runtime/loop.go calls sess.GetMessages before the loop starts). +// +// It reproduces the exact #1676 condition: a compaction summary whose +// FirstKeptEntry boundary lands between an assistant tool_use and its +// tool_result, so buildSessionSummaryMessages emits the conversation starting at +// the orphaned tool_result. GetMessages must return a provider-valid sequence. +func TestGetMessages_ResumeAfterCompaction_NoOrphanedToolResult(t *testing.T) { + testAgent := &agent.Agent{} + s := New() + + // items[0]: old user turn (summarized away) + s.AddMessage(NewAgentMessage("", &chat.Message{Role: chat.MessageRoleUser, Content: "old question"})) + // items[1]: assistant that issued tool call tc1 (summarized away) + s.AddMessage(NewAgentMessage("", &chat.Message{ + Role: chat.MessageRoleAssistant, + Content: "calling tool", + ToolCalls: []tools.ToolCall{{ID: "tc1"}}, + })) + // items[2]: the tool result — KEPT, but its assistant (items[1]) is not + s.AddMessage(NewAgentMessage("", &chat.Message{ + Role: chat.MessageRoleTool, + ToolCallID: "tc1", + Content: "tool output", + })) + // items[3]: assistant follow-up — kept + s.AddMessage(NewAgentMessage("", &chat.Message{Role: chat.MessageRoleAssistant, Content: "answer"})) + // items[4]: summary whose kept-tail boundary (index 2) splits the tc1 pair + s.Messages = append(s.Messages, Item{Summary: "summary", FirstKeptEntry: 2}) + + messages := s.GetMessages(testAgent) + + // The reconstructed history must not contain the orphaned tc1 result. + assertToolPairingInvariant(t, messages) +} diff --git a/pkg/session/session.go b/pkg/session/session.go index f65f87a21..30303f030 100644 --- a/pkg/session/session.go +++ b/pkg/session/session.go @@ -1250,17 +1250,29 @@ func normalizeMessageContent(messages []chat.Message) []chat.Message { return out } -// sanitizeToolCalls ensures every tool call in assistant messages has a -// corresponding tool-result message. It walks the message list tracking -// pending tool calls; when a tool-result message arrives its ID is marked -// fulfilled. When the next assistant or user message is encountered (or the -// end of the list is reached), any still-pending tool calls receive synthetic -// error results injected just before that boundary. This guarantees the -// provider always sees a valid request/response pair for every tool call. +// sanitizeToolCalls ensures the tool_use/tool_result blocks sent to the +// provider are always balanced, in both directions: +// +// - Every tool call in an assistant message gets a corresponding tool-result +// message. It walks the message list tracking pending tool calls; when a +// tool-result message arrives its ID is marked fulfilled. When the next +// assistant or user message is encountered (or the end of the list is +// reached), any still-pending tool calls receive synthetic error results +// injected just before that boundary. +// - Every tool-result message has a matching tool_use in the preceding +// assistant message. Orphaned tool results — a tool-result whose +// ToolCallID was never issued by the preceding assistant message — are +// dropped. This happens when compaction's kept-tail boundary lands between +// an assistant tool_use and its result, leaving the result at the head of +// the resumed history with no matching tool_use. Providers such as AWS +// Bedrock reject the request outright in that case ("The number of +// toolResult blocks ... exceeds the number of toolUse blocks of previous +// turn"), so we must strip these before the request goes out. func sanitizeToolCalls(messages []chat.Message) []chat.Message { var ( out []chat.Message pendingToolCalls []tools.ToolCall + pendingIDs = make(map[string]bool) resultIDs = make(map[string]bool) ) @@ -1276,20 +1288,30 @@ func sanitizeToolCalls(messages []chat.Message) []chat.Message { } } pendingToolCalls = nil + pendingIDs = make(map[string]bool) resultIDs = make(map[string]bool) } for _, msg := range messages { switch { case msg.Role == chat.MessageRoleTool: - if msg.ToolCallID != "" { - resultIDs[msg.ToolCallID] = true + // Drop orphaned tool results: a tool_result with no matching + // tool_use in the preceding assistant message violates the + // provider contract. + if msg.ToolCallID == "" || !pendingIDs[msg.ToolCallID] { + continue } + resultIDs[msg.ToolCallID] = true case msg.Role == chat.MessageRoleAssistant && len(msg.ToolCalls) > 0: flushPending() out = append(out, msg) pendingToolCalls = msg.ToolCalls + for _, tc := range msg.ToolCalls { + if tc.ID != "" { + pendingIDs[tc.ID] = true + } + } continue case msg.Role == chat.MessageRoleUser || msg.Role == chat.MessageRoleAssistant: