diff --git a/.changeset/chat-no-duplicate-tool-call-end.md b/.changeset/chat-no-duplicate-tool-call-end.md new file mode 100644 index 000000000..957caa6af --- /dev/null +++ b/.changeset/chat-no-duplicate-tool-call-end.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai': patch +--- + +Fix duplicate `TOOL_CALL_END` for server-executed tools. The adapter already streams `START`/`ARGS`/`END` for each tool call, but `chat()` emitted a second `END` afterwards with no matching `START` — an orphan event that AG-UI-strict consumers (e.g. `@ag-ui/client`'s `verifyEvents`) reject. The post-execution phase now only adds `TOOL_CALL_RESULT`. Fixes #519. diff --git a/.changeset/devtools-tool-result-from-result-event.md b/.changeset/devtools-tool-result-from-result-event.md new file mode 100644 index 000000000..8336731ce --- /dev/null +++ b/.changeset/devtools-tool-result-from-result-event.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-event-client': patch +--- + +Surface server-executed tool results in devtools from the `TOOL_CALL_RESULT` event. The devtools middleware previously read results only off `TOOL_CALL_END`, which the adapter emits before the tool runs (so it carries no result). Now that `chat()` no longer re-emits a post-execution `TOOL_CALL_END` (see the `@tanstack/ai` #519 fix), results travel on the spec-compliant `TOOL_CALL_RESULT` event — the middleware now handles it so devtools keeps showing server-tool output. diff --git a/packages/ai-event-client/src/devtools-middleware.ts b/packages/ai-event-client/src/devtools-middleware.ts index 56736f9c4..a5446d3fb 100644 --- a/packages/ai-event-client/src/devtools-middleware.ts +++ b/packages/ai-event-client/src/devtools-middleware.ts @@ -39,6 +39,11 @@ interface DevtoolsToolCallEndChunk { toolCallId: string result?: string } +interface DevtoolsToolCallResultChunk { + type: 'TOOL_CALL_RESULT' + toolCallId: string + content?: string +} interface DevtoolsRunFinishedChunk { type: 'RUN_FINISHED' finishReason?: 'stop' | 'length' | 'content_filter' | 'tool_calls' | null @@ -60,6 +65,7 @@ type DevtoolsKnownChunk = | DevtoolsToolCallStartChunk | DevtoolsToolCallArgsChunk | DevtoolsToolCallEndChunk + | DevtoolsToolCallResultChunk | DevtoolsRunFinishedChunk | DevtoolsRunErrorChunk | DevtoolsStepFinishedChunk @@ -75,6 +81,7 @@ const KNOWN_CHUNK_TYPES: ReadonlySet = new Set([ 'TOOL_CALL_START', 'TOOL_CALL_ARGS', 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', 'RUN_FINISHED', 'RUN_ERROR', 'STEP_FINISHED', @@ -385,6 +392,21 @@ export function devtoolsMiddleware(): DevtoolsChatMiddleware { }) break } + case 'TOOL_CALL_RESULT': { + // Server-executed tool results arrive on the spec-compliant + // TOOL_CALL_RESULT event (the adapter's TOOL_CALL_END carries only + // the parsed input). Surface them to devtools from here so results + // still show up now that the post-execution END is no longer + // re-emitted (#519). + safeEmit('text:chunk:tool-result', { + ...base, + messageId: localMessageId || undefined, + toolCallId: chunk.toolCallId, + result: chunk.content || '', + timestamp: Date.now(), + }) + break + } case 'RUN_FINISHED': { safeEmit('text:chunk:done', { ...base, diff --git a/packages/ai/src/activities/chat/index.ts b/packages/ai/src/activities/chat/index.ts index a678a6ae9..b4fe99aba 100644 --- a/packages/ai/src/activities/chat/index.ts +++ b/packages/ai/src/activities/chat/index.ts @@ -1639,8 +1639,9 @@ class TextEngine< const wireContent = typeof content === 'string' ? content : JSON.stringify(content) - // Emit TOOL_CALL_START + TOOL_CALL_ARGS before TOOL_CALL_END so that - // the client can reconstruct the full tool call during continuations. + // argsMap is set only on continuation re-executions, where the adapter + // never streamed these calls. Otherwise it already emitted END, so a + // second one here would be an orphan that fails verifyEvents (#519). if (argsMap) { chunks.push({ type: 'TOOL_CALL_START', @@ -1660,18 +1661,18 @@ class TextEngine< delta: args, args, } as StreamChunk) - } - chunks.push({ - type: 'TOOL_CALL_END', - timestamp: Date.now(), - model: finishEvent.model, - toolCallId: result.toolCallId, - toolCallName: result.toolName, - toolName: result.toolName, - result: wireContent, - ...(result.state !== undefined && { state: result.state }), - } as StreamChunk) + chunks.push({ + type: 'TOOL_CALL_END', + timestamp: Date.now(), + model: finishEvent.model, + toolCallId: result.toolCallId, + toolCallName: result.toolName, + toolName: result.toolName, + result: wireContent, + ...(result.state !== undefined && { state: result.state }), + } as StreamChunk) + } // AG-UI spec TOOL_CALL_RESULT event (content is string-only per spec) chunks.push({ diff --git a/packages/ai/tests/chat.test.ts b/packages/ai/tests/chat.test.ts index 4a07dc353..c7267f8b5 100644 --- a/packages/ai/tests/chat.test.ts +++ b/packages/ai/tests/chat.test.ts @@ -252,6 +252,7 @@ describe('chat()', () => { ev.runStarted(), ev.toolStart('call_1', 'failTool'), ev.toolArgs('call_1', '{}'), + ev.toolEnd('call_1', 'failTool', { input: {} }), ev.runFinished('tool_calls'), ], [ @@ -286,10 +287,71 @@ describe('chat()', () => { const contentStr = (toolResultChunks[0] as any).content expect(contentStr).toContain('error') - const toolCallEnd = chunks.find( + // Error state rides on TOOL_CALL_RESULT, not the END + const toolResultErr = chunks.find( + (c) => c.type === 'TOOL_CALL_RESULT' && c.toolCallId === 'call_1', + ) + expect(toolResultErr).toMatchObject({ state: 'output-error' }) + + // No duplicate END (#519) + const endChunks = chunks.filter( (c) => c.type === 'TOOL_CALL_END' && c.toolCallId === 'call_1', ) - expect(toolCallEnd).toMatchObject({ state: 'output-error' }) + expect(endChunks).toHaveLength(1) + }) + + // #519: post-execution must not duplicate the END the adapter already streamed + it('should emit exactly one TOOL_CALL_END per server-executed tool', async () => { + const { adapter } = createMockAdapter({ + iterations: [ + [ + ev.runStarted(), + ev.toolStart('call_1', 'getWeather'), + ev.toolArgs('call_1', '{"city":"NYC"}'), + ev.toolEnd('call_1', 'getWeather', { input: { city: 'NYC' } }), + ev.runFinished('tool_calls'), + ], + [ + ev.runStarted(), + ev.textStart(), + ev.textContent('72F in NYC.'), + ev.textEnd(), + ev.runFinished('stop'), + ], + ], + }) + + const stream = chat({ + adapter, + messages: [{ role: 'user', content: 'Weather?' }], + tools: [serverTool('getWeather', () => ({ temp: 72 }))], + }) + + const chunks = await collectChunks(stream as AsyncIterable) + + const starts = chunks.filter( + (c) => c.type === 'TOOL_CALL_START' && c.toolCallId === 'call_1', + ) + const ends = chunks.filter( + (c) => c.type === 'TOOL_CALL_END' && c.toolCallId === 'call_1', + ) + const results = chunks.filter( + (c) => c.type === 'TOOL_CALL_RESULT' && c.toolCallId === 'call_1', + ) + + // pre-fix `ends` was 2 + expect(starts).toHaveLength(1) + expect(ends).toHaveLength(1) + expect(results).toHaveLength(1) + + // Every END has a matching START (the verifyEvents invariant) + const open = new Set() + for (const c of chunks) { + if (c.type === 'TOOL_CALL_START') open.add(c.toolCallId) + if (c.type === 'TOOL_CALL_END') { + expect(open.has(c.toolCallId)).toBe(true) + } + } }) }) @@ -1528,6 +1590,9 @@ describe('chat()', () => { 'call_disc', JSON.stringify({ toolNames: ['getWeather'] }), ) + yield ev.toolEnd('call_disc', '__lazy__tool__discovery__', { + input: { toolNames: ['getWeather'] }, + }) yield ev.runFinished('tool_calls') })() } else if (callCount === 2 && toolNames.includes('getWeather')) { @@ -1536,6 +1601,9 @@ describe('chat()', () => { yield ev.runStarted() yield ev.toolStart('call_weather', 'getWeather') yield ev.toolArgs('call_weather', '{"city":"NYC"}') + yield ev.toolEnd('call_weather', 'getWeather', { + input: { city: 'NYC' }, + }) yield ev.runFinished('tool_calls') })() } else { diff --git a/packages/ai/tests/middleware.test.ts b/packages/ai/tests/middleware.test.ts index 03b792f12..e5a5bb90e 100644 --- a/packages/ai/tests/middleware.test.ts +++ b/packages/ai/tests/middleware.test.ts @@ -1449,8 +1449,7 @@ describe('chat() middleware', () => { // Tool execution phase 'onBeforeToolCall:myTool', 'onAfterToolCall:myTool:true', - // Tool result events (piped through middleware) - 'onChunk:TOOL_CALL_END', + // Only the result — the adapter already streamed END above (#519) 'onChunk:TOOL_CALL_RESULT', // Second model call (beforeModel phase) 'onConfig:beforeModel', diff --git a/testing/e2e/src/routeTree.gen.ts b/testing/e2e/src/routeTree.gen.ts index b0e11446b..351d0c138 100644 --- a/testing/e2e/src/routeTree.gen.ts +++ b/testing/e2e/src/routeTree.gen.ts @@ -24,6 +24,7 @@ import { Route as ApiVideoRouteImport } from './routes/api.video' import { Route as ApiTtsRouteImport } from './routes/api.tts' import { Route as ApiTranscriptionRouteImport } from './routes/api.transcription' import { Route as ApiToolsTestRouteImport } from './routes/api.tools-test' +import { Route as ApiToolCallLifecycleWireRouteImport } from './routes/api.tool-call-lifecycle-wire' import { Route as ApiSummarizeRouteImport } from './routes/api.summarize' import { Route as ApiOpenrouterWebToolsWireRouteImport } from './routes/api.openrouter-web-tools-wire' import { Route as ApiOpenrouterCostRouteImport } from './routes/api.openrouter-cost' @@ -117,6 +118,12 @@ const ApiToolsTestRoute = ApiToolsTestRouteImport.update({ path: '/api/tools-test', getParentRoute: () => rootRouteImport, } as any) +const ApiToolCallLifecycleWireRoute = + ApiToolCallLifecycleWireRouteImport.update({ + id: '/api/tool-call-lifecycle-wire', + path: '/api/tool-call-lifecycle-wire', + getParentRoute: () => rootRouteImport, + } as any) const ApiSummarizeRoute = ApiSummarizeRouteImport.update({ id: '/api/summarize', path: '/api/summarize', @@ -228,6 +235,7 @@ export interface FileRoutesByFullPath { '/api/openrouter-cost': typeof ApiOpenrouterCostRoute '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute '/api/summarize': typeof ApiSummarizeRoute + '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute '/api/tools-test': typeof ApiToolsTestRoute '/api/transcription': typeof ApiTranscriptionRouteWithChildren '/api/tts': typeof ApiTtsRouteWithChildren @@ -262,6 +270,7 @@ export interface FileRoutesByTo { '/api/openrouter-cost': typeof ApiOpenrouterCostRoute '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute '/api/summarize': typeof ApiSummarizeRoute + '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute '/api/tools-test': typeof ApiToolsTestRoute '/api/transcription': typeof ApiTranscriptionRouteWithChildren '/api/tts': typeof ApiTtsRouteWithChildren @@ -297,6 +306,7 @@ export interface FileRoutesById { '/api/openrouter-cost': typeof ApiOpenrouterCostRoute '/api/openrouter-web-tools-wire': typeof ApiOpenrouterWebToolsWireRoute '/api/summarize': typeof ApiSummarizeRoute + '/api/tool-call-lifecycle-wire': typeof ApiToolCallLifecycleWireRoute '/api/tools-test': typeof ApiToolsTestRoute '/api/transcription': typeof ApiTranscriptionRouteWithChildren '/api/tts': typeof ApiTtsRouteWithChildren @@ -333,6 +343,7 @@ export interface FileRouteTypes { | '/api/openrouter-cost' | '/api/openrouter-web-tools-wire' | '/api/summarize' + | '/api/tool-call-lifecycle-wire' | '/api/tools-test' | '/api/transcription' | '/api/tts' @@ -367,6 +378,7 @@ export interface FileRouteTypes { | '/api/openrouter-cost' | '/api/openrouter-web-tools-wire' | '/api/summarize' + | '/api/tool-call-lifecycle-wire' | '/api/tools-test' | '/api/transcription' | '/api/tts' @@ -401,6 +413,7 @@ export interface FileRouteTypes { | '/api/openrouter-cost' | '/api/openrouter-web-tools-wire' | '/api/summarize' + | '/api/tool-call-lifecycle-wire' | '/api/tools-test' | '/api/transcription' | '/api/tts' @@ -436,6 +449,7 @@ export interface RootRouteChildren { ApiOpenrouterCostRoute: typeof ApiOpenrouterCostRoute ApiOpenrouterWebToolsWireRoute: typeof ApiOpenrouterWebToolsWireRoute ApiSummarizeRoute: typeof ApiSummarizeRoute + ApiToolCallLifecycleWireRoute: typeof ApiToolCallLifecycleWireRoute ApiToolsTestRoute: typeof ApiToolsTestRoute ApiTranscriptionRoute: typeof ApiTranscriptionRouteWithChildren ApiTtsRoute: typeof ApiTtsRouteWithChildren @@ -550,6 +564,13 @@ declare module '@tanstack/react-router' { preLoaderRoute: typeof ApiToolsTestRouteImport parentRoute: typeof rootRouteImport } + '/api/tool-call-lifecycle-wire': { + id: '/api/tool-call-lifecycle-wire' + path: '/api/tool-call-lifecycle-wire' + fullPath: '/api/tool-call-lifecycle-wire' + preLoaderRoute: typeof ApiToolCallLifecycleWireRouteImport + parentRoute: typeof rootRouteImport + } '/api/summarize': { id: '/api/summarize' path: '/api/summarize' @@ -753,6 +774,7 @@ const rootRouteChildren: RootRouteChildren = { ApiOpenrouterCostRoute: ApiOpenrouterCostRoute, ApiOpenrouterWebToolsWireRoute: ApiOpenrouterWebToolsWireRoute, ApiSummarizeRoute: ApiSummarizeRoute, + ApiToolCallLifecycleWireRoute: ApiToolCallLifecycleWireRoute, ApiToolsTestRoute: ApiToolsTestRoute, ApiTranscriptionRoute: ApiTranscriptionRouteWithChildren, ApiTtsRoute: ApiTtsRouteWithChildren, diff --git a/testing/e2e/src/routes/api.tool-call-lifecycle-wire.ts b/testing/e2e/src/routes/api.tool-call-lifecycle-wire.ts new file mode 100644 index 000000000..e822cdcc0 --- /dev/null +++ b/testing/e2e/src/routes/api.tool-call-lifecycle-wire.ts @@ -0,0 +1,164 @@ +import { createFileRoute } from '@tanstack/react-router' +import { + EventType, + chat, + createChatOptions, + maxIterations, + toolDefinition, +} from '@tanstack/ai' +import { z } from 'zod' +import type { AnyTextAdapter, StreamChunk } from '@tanstack/ai' + +/** + * Wire-format regression for issue #519. + * + * Real adapters stream `TOOL_CALL_START` / `TOOL_CALL_ARGS` / `TOOL_CALL_END` + * for every tool call. Pre-fix, `chat()`'s post-execution phase pushed a + * *second* `TOOL_CALL_END` (with no matching `START`) after running a + * server tool, so the stream carried an orphan `END` that AG-UI-strict + * consumers (`@ag-ui/client`'s `verifyEvents`) reject. + * + * The self-contained adapter below mirrors a real one: it streams its own + * START/ARGS/END for the tool call, then a text answer once the tool result + * comes back. The companion spec drains the emitted chunks and asserts the + * lifecycle is balanced — exactly one END per tool call, each preceded by a + * matching START. + */ +function createServerToolAdapter(): AnyTextAdapter { + return { + kind: 'text', + name: 'tool-lifecycle-test', + model: 'tool-lifecycle-test', + '~types': { + providerOptions: {}, + inputModalities: ['text'], + messageMetadataByModality: {}, + toolCapabilities: [], + toolCallMetadata: undefined, + systemPromptMetadata: undefined, + }, + async *chatStream(options): AsyncGenerator { + const model = 'tool-lifecycle-test' + const runId = options.runId ?? 'tool-lifecycle-run' + const threadId = options.threadId ?? 'tool-lifecycle-thread' + const messageId = `${runId}-message` + const toolCallId = 'call_weather' + const hasToolResult = options.messages.some((m) => m.role === 'tool') + + yield { + type: EventType.RUN_STARTED, + runId, + threadId, + model, + timestamp: Date.now(), + } + + // Second iteration: the tool ran, answer with text and finish. + if (hasToolResult) { + yield { + type: EventType.TEXT_MESSAGE_START, + messageId, + role: 'assistant', + model, + timestamp: Date.now(), + } + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId, + delta: 'It is 72F in NYC.', + model, + timestamp: Date.now(), + } + yield { + type: EventType.TEXT_MESSAGE_END, + messageId, + model, + timestamp: Date.now(), + } + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + finishReason: 'stop', + timestamp: Date.now(), + } + return + } + + // First iteration: stream the full tool-call lifecycle, as a real + // adapter does, then finish with `tool_calls`. + yield { + type: EventType.TOOL_CALL_START, + toolCallId, + toolCallName: 'getWeather', + toolName: 'getWeather', + model, + timestamp: Date.now(), + } + yield { + type: EventType.TOOL_CALL_ARGS, + toolCallId, + delta: '{"city":"NYC"}', + model, + timestamp: Date.now(), + } + yield { + type: EventType.TOOL_CALL_END, + toolCallId, + toolCallName: 'getWeather', + toolName: 'getWeather', + input: { city: 'NYC' }, + model, + timestamp: Date.now(), + } + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + finishReason: 'tool_calls', + timestamp: Date.now(), + } + }, + } +} + +export const Route = createFileRoute('/api/tool-call-lifecycle-wire')({ + server: { + handlers: { + POST: async () => { + const getWeather = toolDefinition({ + name: 'getWeather', + description: 'Get the weather for a city', + inputSchema: z.object({ city: z.string() }), + }).server(async ({ city }) => ({ city, tempC: 22 })) + + const chunks: Array = [] + try { + for await (const chunk of chat({ + ...createChatOptions({ adapter: createServerToolAdapter() }), + tools: [getWeather], + messages: [{ role: 'user', content: "What's the weather in NYC?" }], + agentLoopStrategy: maxIterations(3), + })) { + chunks.push(chunk) + } + } catch (error) { + return new Response( + JSON.stringify({ + chunks, + error: error instanceof Error ? error.message : String(error), + }), + { status: 200, headers: { 'Content-Type': 'application/json' } }, + ) + } + + return new Response(JSON.stringify({ chunks, error: null }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }) + }, + }, + }, +}) diff --git a/testing/e2e/tests/anthropic-server-tool.spec.ts b/testing/e2e/tests/anthropic-server-tool.spec.ts index b26bea74e..90b5865ea 100644 --- a/testing/e2e/tests/anthropic-server-tool.spec.ts +++ b/testing/e2e/tests/anthropic-server-tool.spec.ts @@ -34,9 +34,7 @@ test.describe('anthropic — webFetchTool() streaming (#604)', () => { expect(error).toBeNull() const toolCallStarts = chunks.filter((c) => c.type === 'TOOL_CALL_START') - // `TOOL_CALL_END` is emitted twice per call: once by the adapter with - // the parsed `input`, and once by the tool runner with the execution - // `result`. The bug shape lived in the first one — that's where the + // The adapter's `TOOL_CALL_END` carries the parsed `input` — where the // pre-fix `JSON.parse(concatenatedArgs)` blew up. const toolCallArgEnds = chunks.filter( (c) => diff --git a/testing/e2e/tests/tool-call-lifecycle.spec.ts b/testing/e2e/tests/tool-call-lifecycle.spec.ts new file mode 100644 index 000000000..e34390577 --- /dev/null +++ b/testing/e2e/tests/tool-call-lifecycle.spec.ts @@ -0,0 +1,56 @@ +import { test, expect } from './fixtures' + +/** + * Issue #519 regression — `chat()` must not emit a duplicate `TOOL_CALL_END` + * for server-executed tools. + * + * The adapter streams its own START/ARGS/END; pre-fix the post-execution phase + * pushed a second END with no matching START, which `@ag-ui/client`'s + * `verifyEvents` rejects. `/api/tool-call-lifecycle-wire` runs the real + * `chat()` engine with a server tool and returns the emitted chunks. + */ +test.describe('chat() tool-call lifecycle (#519)', () => { + test('emits exactly one TOOL_CALL_END per server-executed tool', async ({ + request, + }) => { + const res = await request.post('/api/tool-call-lifecycle-wire') + expect(res.ok()).toBe(true) + const { chunks, error } = (await res.json()) as { + chunks: Array> + error: string | null + } + + expect(error).toBeNull() + + const idOf = (c: Record) => c.toolCallId as string + const starts = chunks.filter( + (c) => c.type === 'TOOL_CALL_START' && idOf(c) === 'call_weather', + ) + const ends = chunks.filter( + (c) => c.type === 'TOOL_CALL_END' && idOf(c) === 'call_weather', + ) + const results = chunks.filter( + (c) => c.type === 'TOOL_CALL_RESULT' && idOf(c) === 'call_weather', + ) + + // Pre-fix `ends` was 2 (adapter + duplicate from buildToolResultChunks). + expect(starts).toHaveLength(1) + expect(ends).toHaveLength(1) + expect(results).toHaveLength(1) + + // The server tool actually ran — its result reached the stream. + expect(results[0]).toMatchObject({ content: expect.stringContaining('22') }) + + // verifyEvents invariant: every END consumes a matching open START, so a + // duplicate END for the same toolCallId finds none and fails here. + const open = new Set() + for (const c of chunks) { + if (c.type === 'TOOL_CALL_START') open.add(idOf(c)) + if (c.type === 'TOOL_CALL_END') { + const id = idOf(c) + expect(open.has(id)).toBe(true) + open.delete(id) + } + } + }) +})