-
Notifications
You must be signed in to change notification settings - Fork 375
fix: drop orphaned tool results on session resume #3001
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+147
−9
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,116 @@ | ||
| package session | ||
|
|
||
| import ( | ||
| "testing" | ||
|
|
||
| "github.com/docker/docker-agent/pkg/agent" | ||
| "github.com/docker/docker-agent/pkg/chat" | ||
| "github.com/docker/docker-agent/pkg/tools" | ||
| ) | ||
|
|
||
| // assertToolPairingInvariant fails if any tool-result message lacks a matching | ||
| // tool_call ID in a preceding assistant message. This is exactly the invariant | ||
| // AWS Bedrock's ConverseStream enforces: "The number of toolResult blocks ... | ||
| // exceeds the number of toolUse blocks of previous turn." (issue #1676) | ||
| func assertToolPairingInvariant(t *testing.T, messages []chat.Message) { | ||
| t.Helper() | ||
| seen := map[string]bool{} | ||
| for i, m := range messages { | ||
| if m.Role == chat.MessageRoleAssistant { | ||
| for _, tc := range m.ToolCalls { | ||
| seen[tc.ID] = true | ||
| } | ||
| } | ||
| if m.Role == chat.MessageRoleTool { | ||
| if !seen[m.ToolCallID] { | ||
| t.Errorf("orphaned tool result at index %d: tool_call_id %q has no preceding toolUse (Bedrock would reject this)", | ||
| i, m.ToolCallID) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // TestSanitizeToolCalls_DropsOrphanedToolResult is a regression test for issue | ||
| // #1676 (session resume fails on AWS Bedrock with a toolResult/toolUse count | ||
| // mismatch). | ||
| // | ||
| // Scenario: a session is compacted, and the kept-tail boundary (startIndex / | ||
| // FirstKeptEntry) lands *after* the assistant message that issued a tool call | ||
| // but *before* its tool result. On resume, buildSessionSummaryMessages emits | ||
| // the conversation from that boundary, so the reconstructed history begins with | ||
| // a tool-result message whose assistant (toolUse) was left behind the summary. | ||
| // | ||
| // sanitizeToolCalls is the final guard before the request hits the provider and | ||
| // must drop that orphaned result so the request stays valid. | ||
| func TestSanitizeToolCalls_DropsOrphanedToolResult(t *testing.T) { | ||
| // History as reconstructed on resume after compaction: the toolUse-bearing | ||
| // assistant message is gone (folded into the summary), only its result and | ||
| // the following turn survive. | ||
| reconstructed := []chat.Message{ | ||
| {Role: chat.MessageRoleUser, Content: "Session Summary: ..."}, | ||
| // <-- assistant message with ToolCalls{ID:"tc1"} was dropped here | ||
| {Role: chat.MessageRoleTool, ToolCallID: "tc1", Content: "file contents"}, | ||
| {Role: chat.MessageRoleAssistant, Content: "Here is the file."}, | ||
| {Role: chat.MessageRoleUser, Content: "thanks, now resume"}, | ||
| } | ||
|
|
||
| out := sanitizeToolCalls(reconstructed) | ||
|
|
||
| // The orphaned tool result must be gone, leaving a Bedrock-valid sequence. | ||
| assertToolPairingInvariant(t, out) | ||
| for i, m := range out { | ||
| if m.Role == chat.MessageRoleTool && m.ToolCallID == "tc1" { | ||
| t.Fatalf("orphaned tool result tc1 should have been dropped, still present at index %d", i) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // TestSanitizeToolCalls_KeepsMissingResultBalanced pins the opposite direction: | ||
| // a dangling toolUse (no result) gets a synthetic result and stays balanced. | ||
| func TestSanitizeToolCalls_KeepsMissingResultBalanced(t *testing.T) { | ||
| messages := []chat.Message{ | ||
| {Role: chat.MessageRoleUser, Content: "hi"}, | ||
| {Role: chat.MessageRoleAssistant, ToolCalls: []tools.ToolCall{{ID: "tc1"}}}, | ||
| // result missing | ||
| {Role: chat.MessageRoleUser, Content: "next"}, | ||
| } | ||
| out := sanitizeToolCalls(messages) | ||
| assertToolPairingInvariant(t, out) // synthetic result injected | ||
| } | ||
|
|
||
| // TestGetMessages_ResumeAfterCompaction_NoOrphanedToolResult exercises the full | ||
| // GetMessages reconstruction path that runs on the first turn after a session | ||
| // is resumed (runtime/loop.go calls sess.GetMessages before the loop starts). | ||
| // | ||
| // It reproduces the exact #1676 condition: a compaction summary whose | ||
| // FirstKeptEntry boundary lands between an assistant tool_use and its | ||
| // tool_result, so buildSessionSummaryMessages emits the conversation starting at | ||
| // the orphaned tool_result. GetMessages must return a provider-valid sequence. | ||
| func TestGetMessages_ResumeAfterCompaction_NoOrphanedToolResult(t *testing.T) { | ||
| testAgent := &agent.Agent{} | ||
| s := New() | ||
|
|
||
| // items[0]: old user turn (summarized away) | ||
| s.AddMessage(NewAgentMessage("", &chat.Message{Role: chat.MessageRoleUser, Content: "old question"})) | ||
| // items[1]: assistant that issued tool call tc1 (summarized away) | ||
| s.AddMessage(NewAgentMessage("", &chat.Message{ | ||
| Role: chat.MessageRoleAssistant, | ||
| Content: "calling tool", | ||
| ToolCalls: []tools.ToolCall{{ID: "tc1"}}, | ||
| })) | ||
| // items[2]: the tool result — KEPT, but its assistant (items[1]) is not | ||
| s.AddMessage(NewAgentMessage("", &chat.Message{ | ||
| Role: chat.MessageRoleTool, | ||
| ToolCallID: "tc1", | ||
| Content: "tool output", | ||
| })) | ||
| // items[3]: assistant follow-up — kept | ||
| s.AddMessage(NewAgentMessage("", &chat.Message{Role: chat.MessageRoleAssistant, Content: "answer"})) | ||
| // items[4]: summary whose kept-tail boundary (index 2) splits the tc1 pair | ||
| s.Messages = append(s.Messages, Item{Summary: "summary", FirstKeptEntry: 2}) | ||
|
|
||
| messages := s.GetMessages(testAgent) | ||
|
|
||
| // The reconstructed history must not contain the orphaned tc1 result. | ||
| assertToolPairingInvariant(t, messages) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[LOW] Behavioral change: tool results with empty
ToolCallIDare now silently droppedUnder the old logic the condition was
if msg.ToolCallID != ""which only gated theresultIDsbook-keeping — a tool-result with an empty ID still fell through toout = append(out, msg)and was forwarded to the provider. Under the new condition:an empty-ID tool result is now silently dropped. In practice this is likely fine — a tool result with no ID cannot be matched to any
tool_useand most providers mandate non-empty IDs — but it is a silent behavioral regression from the previous behavior. If any in-flight session persisted with empty-ID tool results they would be discarded on the next resume.Consider documenting the intentional drop (e.g.,
// empty ToolCallID is malformed; drop it) so the behavior is explicit rather than a side-effect of the orphan check.