From f3c2e88500fa0f9f0d960c40fb65a24828950e7e Mon Sep 17 00:00:00 2001 From: Gregor Martynus <39992+gr2m@users.noreply.github.com> Date: Sun, 23 Nov 2025 15:11:21 -0800 Subject: [PATCH 1/4] Backport conflicts for PR #10462 to release-v5.0 --- .changeset/popular-hounds-boil.md | 7 +++++ .../generate-text/google-image-multi-step.ts | 31 +++++++++++++++++++ .../ai/src/generate-text/generate-text.ts | 3 ++ ...t-to-google-generative-ai-messages.test.ts | 13 -------- ...onvert-to-google-generative-ai-messages.ts | 8 +---- ...oogle-generative-ai-language-model.test.ts | 6 ++++ .../google-generative-ai-language-model.ts | 4 +++ .../v3/language-model-v3-file.ts | 29 +++++++++++++++++ 8 files changed, 81 insertions(+), 20 deletions(-) create mode 100644 .changeset/popular-hounds-boil.md create mode 100644 examples/ai-core/src/generate-text/google-image-multi-step.ts create mode 100644 packages/provider/src/language-model/v3/language-model-v3-file.ts diff --git a/.changeset/popular-hounds-boil.md b/.changeset/popular-hounds-boil.md new file mode 100644 index 000000000000..c7d37c027305 --- /dev/null +++ b/.changeset/popular-hounds-boil.md @@ -0,0 +1,7 @@ +--- +'@ai-sdk/provider': patch +'@ai-sdk/google': patch +'ai': patch +--- + +fix(google): add thought signature to gemini 3 pro image parts diff --git a/examples/ai-core/src/generate-text/google-image-multi-step.ts b/examples/ai-core/src/generate-text/google-image-multi-step.ts new file mode 100644 index 000000000000..15b254b903eb --- /dev/null +++ b/examples/ai-core/src/generate-text/google-image-multi-step.ts @@ -0,0 +1,31 @@ +import { google } from '@ai-sdk/google'; +import { generateText } from 'ai'; + +import { presentImages } from '../lib/present-image'; +import { run } from '../lib/run'; + +import 'dotenv/config'; + +run(async () => { + const step1 = await generateText({ + model: google('gemini-3-pro-image-preview'), + prompt: + 'Create an image of Los Angeles where all car infrastructure has been replaced with bike infrastructure, trains, pedestrian zones, and parks. The image should be photorealistic and vibrant.', + }); + + await presentImages(step1.files); + + const step2 = await generateText({ + model: google('gemini-3-pro-image-preview'), + messages: [ + ...step1.response.messages, + { + role: 'user', + content: + 'Now create a variation of the image, but in the style of a watercolor painting.', + }, + ], + }); + + await presentImages(step2.files); +}); diff --git a/packages/ai/src/generate-text/generate-text.ts b/packages/ai/src/generate-text/generate-text.ts index d65732e26caa..b9a82bb39bc1 100644 --- a/packages/ai/src/generate-text/generate-text.ts +++ b/packages/ai/src/generate-text/generate-text.ts @@ -884,6 +884,9 @@ function asContent({ return { type: 'file' as const, file: new DefaultGeneratedFile(part), + ...(part.providerMetadata != null + ? { providerMetadata: part.providerMetadata } + : {}), }; } diff --git a/packages/google/src/convert-to-google-generative-ai-messages.test.ts b/packages/google/src/convert-to-google-generative-ai-messages.test.ts index d5065a1ed9ed..bec79540a2c2 100644 --- a/packages/google/src/convert-to-google-generative-ai-messages.test.ts +++ b/packages/google/src/convert-to-google-generative-ai-messages.test.ts @@ -323,19 +323,6 @@ describe('assistant messages', () => { }); }); - it('should throw error for non-PNG images in assistant messages', async () => { - expect(() => - convertToGoogleGenerativeAIMessages([ - { - role: 'assistant', - content: [ - { type: 'file', data: 'AAECAw==', mediaType: 'image/jpeg' }, - ], - }, - ]), - ).toThrow('Only PNG images are supported in assistant messages'); - }); - it('should throw error for URL file data in assistant messages', async () => { expect(() => convertToGoogleGenerativeAIMessages([ diff --git a/packages/google/src/convert-to-google-generative-ai-messages.ts b/packages/google/src/convert-to-google-generative-ai-messages.ts index b0b9bbeff1b1..377278de031b 100644 --- a/packages/google/src/convert-to-google-generative-ai-messages.ts +++ b/packages/google/src/convert-to-google-generative-ai-messages.ts @@ -107,13 +107,6 @@ export function convertToGoogleGenerativeAIMessages( } case 'file': { - if (part.mediaType !== 'image/png') { - throw new UnsupportedFunctionalityError({ - functionality: - 'Only PNG images are supported in assistant messages', - }); - } - if (part.data instanceof URL) { throw new UnsupportedFunctionalityError({ functionality: @@ -126,6 +119,7 @@ export function convertToGoogleGenerativeAIMessages( mimeType: part.mediaType, data: convertToBase64(part.data), }, + thoughtSignature, }; } diff --git a/packages/google/src/google-generative-ai-language-model.test.ts b/packages/google/src/google-generative-ai-language-model.test.ts index 7adfbe677bb3..1f837ce24859 100644 --- a/packages/google/src/google-generative-ai-language-model.test.ts +++ b/packages/google/src/google-generative-ai-language-model.test.ts @@ -1410,6 +1410,7 @@ describe('doGenerate', () => { { "data": "base64encodedimagedata", "mediaType": "image/jpeg", + "providerMetadata": undefined, "type": "file", }, { @@ -1420,6 +1421,7 @@ describe('doGenerate', () => { { "data": "anotherbase64encodedimagedata", "mediaType": "image/png", + "providerMetadata": undefined, "type": "file", }, ] @@ -1472,11 +1474,13 @@ describe('doGenerate', () => { { "data": "imagedata1", "mediaType": "image/jpeg", + "providerMetadata": undefined, "type": "file", }, { "data": "imagedata2", "mediaType": "image/png", + "providerMetadata": undefined, "type": "file", }, ] @@ -1591,11 +1595,13 @@ describe('doGenerate', () => { { "data": "validimagedata", "mediaType": "image/jpeg", + "providerMetadata": undefined, "type": "file", }, { "data": "pdfdata", "mediaType": "application/pdf", + "providerMetadata": undefined, "type": "file", }, ] diff --git a/packages/google/src/google-generative-ai-language-model.ts b/packages/google/src/google-generative-ai-language-model.ts index 6e06f6cc0a6e..862b12246bc9 100644 --- a/packages/google/src/google-generative-ai-language-model.ts +++ b/packages/google/src/google-generative-ai-language-model.ts @@ -271,6 +271,9 @@ export class GoogleGenerativeAILanguageModel implements LanguageModelV2 { type: 'file' as const, data: part.inlineData.data, mediaType: part.inlineData.mimeType, + providerMetadata: part.thoughtSignature + ? { google: { thoughtSignature: part.thoughtSignature } } + : undefined, }); } } @@ -809,6 +812,7 @@ const getContentSchema = () => mimeType: z.string(), data: z.string(), }), + thoughtSignature: z.string().nullish(), }), z.object({ executableCode: z diff --git a/packages/provider/src/language-model/v3/language-model-v3-file.ts b/packages/provider/src/language-model/v3/language-model-v3-file.ts new file mode 100644 index 000000000000..27b18b110b9b --- /dev/null +++ b/packages/provider/src/language-model/v3/language-model-v3-file.ts @@ -0,0 +1,29 @@ +import { SharedV3ProviderMetadata } from '../../shared'; + +/** +A file that has been generated by the model. +Generated files as base64 encoded strings or binary data. +The files should be returned without any unnecessary conversion. + */ +export type LanguageModelV3File = { + type: 'file'; + + /** +The IANA media type of the file, e.g. `image/png` or `audio/mp3`. + +@see https://www.iana.org/assignments/media-types/media-types.xhtml + */ + mediaType: string; + + /** +Generated file data as base64 encoded strings or binary data. + +The file data should be returned without any unnecessary conversion. +If the API returns base64 encoded strings, the file data should be returned +as base64 encoded strings. If the API returns binary data, the file data should +be returned as binary data. + */ + data: string | Uint8Array; + + providerMetadata?: SharedV3ProviderMetadata; +}; From ca42a826ada79f1f4be8f787a21c89967d154791 Mon Sep 17 00:00:00 2001 From: Gregor Martynus <39992+gr2m@users.noreply.github.com> Date: Sun, 23 Nov 2025 15:20:36 -0800 Subject: [PATCH 2/4] adapt for v5 --- .../v2/language-model-v2-file.ts | 4 +++ .../v3/language-model-v3-file.ts | 29 ------------------- 2 files changed, 4 insertions(+), 29 deletions(-) delete mode 100644 packages/provider/src/language-model/v3/language-model-v3-file.ts diff --git a/packages/provider/src/language-model/v2/language-model-v2-file.ts b/packages/provider/src/language-model/v2/language-model-v2-file.ts index 60f1120fd87b..ea3d8ecd6f0f 100644 --- a/packages/provider/src/language-model/v2/language-model-v2-file.ts +++ b/packages/provider/src/language-model/v2/language-model-v2-file.ts @@ -1,3 +1,5 @@ +import { SharedV2ProviderMetadata } from "../../shared"; + /** A file that has been generated by the model. Generated files as base64 encoded strings or binary data. @@ -22,4 +24,6 @@ as base64 encoded strings. If the API returns binary data, the file data should be returned as binary data. */ data: string | Uint8Array; + + providerMetadata?: SharedV2ProviderMetadata; }; diff --git a/packages/provider/src/language-model/v3/language-model-v3-file.ts b/packages/provider/src/language-model/v3/language-model-v3-file.ts deleted file mode 100644 index 27b18b110b9b..000000000000 --- a/packages/provider/src/language-model/v3/language-model-v3-file.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { SharedV3ProviderMetadata } from '../../shared'; - -/** -A file that has been generated by the model. -Generated files as base64 encoded strings or binary data. -The files should be returned without any unnecessary conversion. - */ -export type LanguageModelV3File = { - type: 'file'; - - /** -The IANA media type of the file, e.g. `image/png` or `audio/mp3`. - -@see https://www.iana.org/assignments/media-types/media-types.xhtml - */ - mediaType: string; - - /** -Generated file data as base64 encoded strings or binary data. - -The file data should be returned without any unnecessary conversion. -If the API returns base64 encoded strings, the file data should be returned -as base64 encoded strings. If the API returns binary data, the file data should -be returned as binary data. - */ - data: string | Uint8Array; - - providerMetadata?: SharedV3ProviderMetadata; -}; From 18890c1a2ec4fcd5c96d68666acc8c85a706d9ba Mon Sep 17 00:00:00 2001 From: Gregor Martynus <39992+gr2m@users.noreply.github.com> Date: Mon, 24 Nov 2025 07:56:56 -0800 Subject: [PATCH 3/4] jsdoc --- .../provider/src/language-model/v2/language-model-v2-file.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/provider/src/language-model/v2/language-model-v2-file.ts b/packages/provider/src/language-model/v2/language-model-v2-file.ts index ea3d8ecd6f0f..3aad0e87bc9c 100644 --- a/packages/provider/src/language-model/v2/language-model-v2-file.ts +++ b/packages/provider/src/language-model/v2/language-model-v2-file.ts @@ -25,5 +25,8 @@ be returned as binary data. */ data: string | Uint8Array; + /** +Optional provider-specific metadata for the file part. + */ providerMetadata?: SharedV2ProviderMetadata; }; From ea689a31bb1e4b05a9841b03c1772c1b62a9632a Mon Sep 17 00:00:00 2001 From: Gregor Martynus <39992+gr2m@users.noreply.github.com> Date: Mon, 24 Nov 2025 07:57:36 -0800 Subject: [PATCH 4/4] style: prettier --- .../provider/src/language-model/v2/language-model-v2-file.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/provider/src/language-model/v2/language-model-v2-file.ts b/packages/provider/src/language-model/v2/language-model-v2-file.ts index 3aad0e87bc9c..b91f39e2f34f 100644 --- a/packages/provider/src/language-model/v2/language-model-v2-file.ts +++ b/packages/provider/src/language-model/v2/language-model-v2-file.ts @@ -1,4 +1,4 @@ -import { SharedV2ProviderMetadata } from "../../shared"; +import { SharedV2ProviderMetadata } from '../../shared'; /** A file that has been generated by the model. @@ -24,7 +24,7 @@ as base64 encoded strings. If the API returns binary data, the file data should be returned as binary data. */ data: string | Uint8Array; - + /** Optional provider-specific metadata for the file part. */