Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions src/api/providers/__tests__/gemini-handler.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ import { GeminiHandler } from "../gemini"
import type { ApiHandlerOptions } from "../../../shared/api"

describe("GeminiHandler backend support", () => {
it("createMessage uses function declarations (URL context and grounding are only for completePrompt)", async () => {
// URL context and grounding are mutually exclusive with function declarations
// in Gemini API, so createMessage only uses function declarations.
// URL context/grounding are only added in completePrompt.
it("createMessage uses function declarations and googleSearch for Gemini 3 models", async () => {
// Gemini 3+ models support combining built-in tools (Google Search) with
// function declarations in a single generation (tool context circulation).
const options = {
apiProvider: "gemini",
enableUrlContext: true,
Expand All @@ -20,9 +19,9 @@ describe("GeminiHandler backend support", () => {
handler["client"].models.generateContentStream = stub
await handler.createMessage("instr", [] as any).next()
const config = stub.mock.calls[0][0].config
// createMessage always uses function declarations only
// (tools are always present from ALWAYS_AVAILABLE_TOOLS)
expect(config.tools).toEqual([{ functionDeclarations: expect.any(Array) }])
// Default model is gemini-3.1-pro-preview, a Gemini 3 model,
// so tools should include both function declarations and googleSearch.
expect(config.tools).toEqual([{ functionDeclarations: expect.any(Array) }, { googleSearch: {} }])
})

it("completePrompt passes config overrides without tools when URL context and grounding disabled", async () => {
Expand Down
200 changes: 200 additions & 0 deletions src/api/providers/__tests__/gemini.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,206 @@ describe("GeminiHandler", () => {
})
})

describe("Gemini 3 tool context circulation", () => {
const systemPrompt = "You are a helpful assistant"
const mockMessages: Anthropic.Messages.MessageParam[] = [
{ role: "user", content: "Search the web for the latest API docs" },
]

it("should include googleSearch tool for Gemini 3 models", async () => {
const gemini3Handler = new GeminiHandler({
apiKey: "test-key",
apiModelId: "gemini-3-pro-preview",
geminiApiKey: "test-key",
})

const mockGenerateContentStream = vitest.fn().mockResolvedValue({
[Symbol.asyncIterator]: async function* () {
yield { text: "Hello" }
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
},
})

gemini3Handler["client"] = {
models: {
generateContentStream: mockGenerateContentStream,
generateContent: vitest.fn(),
},
} as any

const stream = gemini3Handler.createMessage(systemPrompt, mockMessages)
for await (const _chunk of stream) {
// consume
}

const callArgs = mockGenerateContentStream.mock.calls[0][0]
const tools = callArgs.config.tools
expect(tools).toHaveLength(2)
expect(tools[0]).toHaveProperty("functionDeclarations")
expect(tools[1]).toEqual({ googleSearch: {} })
})

it("should NOT include googleSearch tool for pre-Gemini 3 models", async () => {
// The default handler uses geminiDefaultModelId which is gemini-3.1-pro-preview
// Let's create one with a 2.5 model
const gemini25Handler = new GeminiHandler({
apiKey: "test-key",
apiModelId: "gemini-2.5-pro",
geminiApiKey: "test-key",
})

const mockGenerateContentStream = vitest.fn().mockResolvedValue({
[Symbol.asyncIterator]: async function* () {
yield { text: "Hello" }
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
},
})

gemini25Handler["client"] = {
models: {
generateContentStream: mockGenerateContentStream,
generateContent: vitest.fn(),
},
} as any

const stream = gemini25Handler.createMessage(systemPrompt, mockMessages)
for await (const _chunk of stream) {
// consume
}

const callArgs = mockGenerateContentStream.mock.calls[0][0]
const tools = callArgs.config.tools
expect(tools).toHaveLength(1)
expect(tools[0]).toHaveProperty("functionDeclarations")
})

it("should handle executableCode parts in streaming response", async () => {
const gemini3Handler = new GeminiHandler({
apiKey: "test-key",
apiModelId: "gemini-3-pro-preview",
geminiApiKey: "test-key",
})

const mockGenerateContentStream = vitest.fn().mockResolvedValue({
[Symbol.asyncIterator]: async function* () {
yield {
candidates: [
{
content: {
parts: [
{
executableCode: {
code: 'print("hello")',
language: "python",
},
},
],
},
},
],
}
yield {
candidates: [
{
content: {
parts: [
{
codeExecutionResult: {
output: "hello",
outcome: "OUTCOME_OK",
},
},
],
},
},
],
}
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
},
})

gemini3Handler["client"] = {
models: {
generateContentStream: mockGenerateContentStream,
generateContent: vitest.fn(),
},
} as any

const stream = gemini3Handler.createMessage(systemPrompt, mockMessages)
const chunks = []
for await (const chunk of stream) {
chunks.push(chunk)
}

// Should yield text chunks for executableCode and codeExecutionResult
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks.length).toBe(2)
expect(textChunks[0].text).toContain('print("hello")')
expect(textChunks[1].text).toContain("hello")

// Should store server-side tool parts for history round-tripping
const storedParts = gemini3Handler.getServerSideToolParts()
expect(storedParts).toHaveLength(2)
expect(storedParts![0].type).toBe("executableCode")
expect(storedParts![0].data).toEqual({ code: 'print("hello")', language: "python" })
expect(storedParts![1].type).toBe("codeExecutionResult")
expect(storedParts![1].data).toEqual({ output: "hello", outcome: "OUTCOME_OK" })
})

it("should reset server-side tool parts between requests", async () => {
const gemini3Handler = new GeminiHandler({
apiKey: "test-key",
apiModelId: "gemini-3-pro-preview",
geminiApiKey: "test-key",
})

const mockGenerateContentStream = vitest.fn()

gemini3Handler["client"] = {
models: {
generateContentStream: mockGenerateContentStream,
generateContent: vitest.fn(),
},
} as any

// First request: has server-side tool parts
mockGenerateContentStream.mockResolvedValueOnce({
[Symbol.asyncIterator]: async function* () {
yield {
candidates: [
{
content: {
parts: [{ executableCode: { code: "x = 1", language: "python" } }],
},
},
],
}
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
},
})

let stream = gemini3Handler.createMessage(systemPrompt, mockMessages)
for await (const _chunk of stream) {
// consume
}
expect(gemini3Handler.getServerSideToolParts()).toHaveLength(1)

// Second request: no server-side tool parts
mockGenerateContentStream.mockResolvedValueOnce({
[Symbol.asyncIterator]: async function* () {
yield { text: "plain text" }
yield { usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5 } }
},
})

stream = gemini3Handler.createMessage(systemPrompt, mockMessages)
for await (const _chunk of stream) {
// consume
}
expect(gemini3Handler.getServerSideToolParts()).toBeUndefined()
})
})

describe("error telemetry", () => {
const mockMessages: Anthropic.Messages.MessageParam[] = [
{
Expand Down
96 changes: 84 additions & 12 deletions src/api/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,27 @@ import { getModelParams } from "../transform/model-params"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { BaseProvider } from "./base-provider"

/**
* Represents a server-side tool part returned by Gemini 3 when built-in tools
* (Google Search, Code Execution, URL Context) are combined with custom
* function declarations. These parts must be preserved and round-tripped in
* conversation history for the model to maintain context.
*/
export type ServerSideToolPart = {
type: "serverSideToolCall" | "serverSideToolResponse" | "executableCode" | "codeExecutionResult"
/** Raw part data from the Gemini API response, preserved for round-tripping. */
data: Record<string, unknown>
}

/**
* Returns true if the model ID corresponds to a Gemini 3+ model that supports
* combining server-side built-in tools (Google Search, URL Context, Code
* Execution) with client-side function declarations in a single generation.
*/
function isGemini3Model(modelId: string): boolean {
return /^gemini-3/.test(modelId)
}

type GeminiHandlerOptions = ApiHandlerOptions & {
isVertex?: boolean
}
Expand All @@ -39,6 +60,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
private client: GoogleGenAI
private lastThoughtSignature?: string
private lastResponseId?: string
private lastServerSideToolParts?: ServerSideToolPart[]
private readonly providerName = "Gemini"

constructor({ isVertex, ...options }: GeminiHandlerOptions) {
Expand Down Expand Up @@ -80,6 +102,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
// Reset per-request metadata that we persist into apiConversationHistory.
this.lastThoughtSignature = undefined
this.lastResponseId = undefined
this.lastServerSideToolParts = undefined

// For hybrid/budget reasoning models (e.g. Gemini 2.5 Pro), respect user-configured
// modelMaxTokens so the ThinkingBudget slider can control the cap. For effort-only or
Expand Down Expand Up @@ -129,18 +152,30 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
.flat()

// Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS).
// Google built-in tools (Grounding, URL Context) are mutually exclusive
// with function declarations in the Gemini API, so we always use
// function declarations when tools are provided.
const tools: GenerateContentConfig["tools"] = [
{
functionDeclarations: (metadata?.tools ?? []).map((tool) => ({
name: (tool as any).function.name,
description: (tool as any).function.description,
parametersJsonSchema: (tool as any).function.parameters,
})),
},
]
// For pre-Gemini 3 models, Google built-in tools (Grounding, URL Context)
// are mutually exclusive with function declarations.
// For Gemini 3+, we can combine them, enabling "tool context circulation"
// where the model can use both server-side built-in tools and client-side
// function declarations in a single generation.
const isGemini3 = isGemini3Model(model)

const functionDeclarationsTool = {
functionDeclarations: (metadata?.tools ?? []).map((tool) => ({
name: (tool as any).function.name,
description: (tool as any).function.description,
parametersJsonSchema: (tool as any).function.parameters,
})),
}

const tools: GenerateContentConfig["tools"] = isGemini3
? [
functionDeclarationsTool,
// Enable Google Search as a built-in tool alongside custom function declarations.
// The model can invoke this server-side, and the results will be circulated back
// as context for subsequent turns.
{ googleSearch: {} },
]
: [functionDeclarationsTool]

// Determine temperature respecting model capabilities and defaults:
// - If supportsTemperature is explicitly false, ignore user overrides
Expand Down Expand Up @@ -235,6 +270,8 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
text?: string
thoughtSignature?: string
functionCall?: { name: string; args: Record<string, unknown> }
executableCode?: { code: string; language?: string }
codeExecutionResult?: { output: string; outcome?: string }
}>) {
// Capture thought signatures so they can be persisted into API history.
const thoughtSignature = part.thoughtSignature
Expand Down Expand Up @@ -277,6 +314,37 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
}

toolCallCounter++
} else if (part.executableCode) {
// Server-side code execution part (Gemini 3 built-in tool).
// Surface the code to the user as informational text and
// store the raw part for round-tripping in conversation history.
hasContent = true
const lang = part.executableCode.language ?? "python"
yield {
type: "text",
text: `\n\`\`\`${lang}\n${part.executableCode.code}\n\`\`\`\n`,
}
if (!this.lastServerSideToolParts) {
this.lastServerSideToolParts = []
}
this.lastServerSideToolParts.push({
type: "executableCode",
data: part.executableCode as unknown as Record<string, unknown>,
})
} else if (part.codeExecutionResult) {
// Server-side code execution result (Gemini 3 built-in tool).
hasContent = true
yield {
type: "text",
text: `\n**Code Execution Result:**\n\`\`\`\n${part.codeExecutionResult.output}\n\`\`\`\n`,
}
if (!this.lastServerSideToolParts) {
this.lastServerSideToolParts = []
}
this.lastServerSideToolParts.push({
type: "codeExecutionResult",
data: part.codeExecutionResult as unknown as Record<string, unknown>,
})
} else {
// This is regular content
if (part.text) {
Expand Down Expand Up @@ -463,6 +531,10 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
return this.lastResponseId
}

public getServerSideToolParts(): ServerSideToolPart[] | undefined {
return this.lastServerSideToolParts
}

public calculateCost({
info,
inputTokens,
Expand Down
Loading
Loading