diff --git a/js/plugins/compat-oai/src/model.ts b/js/plugins/compat-oai/src/model.ts index 1544d5d0c6..5d18a382e5 100644 --- a/js/plugins/compat-oai/src/model.ts +++ b/js/plugins/compat-oai/src/model.ts @@ -98,6 +98,56 @@ export function toOpenAITool(tool: ToolDefinition): ChatCompletionTool { }; } +/** + * Checks if a content type is an image type. + * @param contentType The content type to check. + * @returns True if the content type is an image type. + */ +function isImageContentType(contentType?: string): boolean { + if (!contentType) return false; + return contentType.startsWith('image/'); +} + +/** + * Extracts the base64 data and content type from a data URL. + * @param url The data URL to parse. + * @returns The base64 data and content type, or null if invalid. + */ +function extractDataFromBase64Url(url: string): { + data: string; + contentType: string; +} | null { + const match = url.match(/^data:([^;]+);base64,(.+)$/); + return ( + match && { + contentType: match[1], + data: match[2], + } + ); +} + +/** + * Map of content types to file extensions. + */ +const FILE_EXTENSIONS: Record = { + 'application/pdf': 'pdf', + 'application/msword': 'doc', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': + 'docx', + 'text/plain': 'txt', + 'text/csv': 'csv', +}; + +/** + * Generates a filename from a content type. + * @param contentType The content type. + * @returns A filename with appropriate extension. + */ +function generateFilenameFromContentType(contentType: string): string { + const ext = FILE_EXTENSIONS[contentType] || ''; + return ext ? `file.${ext}` : 'file'; +} + /** * Converts a Genkit Part to the corresponding OpenAI ChatCompletionContentPart. * @param part The Genkit Part to convert. @@ -115,13 +165,49 @@ export function toOpenAITextAndMedia( text: part.text, }; } else if (part.media) { - return { - type: 'image_url', - image_url: { - url: part.media.url, - detail: visualDetailLevel, - }, - }; + // Determine the content type from the media part or data URL + let contentType = part.media.contentType; + if (!contentType && part.media.url.startsWith('data:')) { + const extracted = extractDataFromBase64Url(part.media.url); + if (extracted) { + contentType = extracted.contentType; + } + } + + // Check if this is an image type + if (isImageContentType(contentType)) { + return { + type: 'image_url', + image_url: { + url: part.media.url, + detail: visualDetailLevel, + }, + }; + } + + // For non-image types (like PDF), use the file type + // OpenAI expects the full data URL (with data: prefix) in file_data + if (part.media.url.startsWith('data:')) { + const extracted = extractDataFromBase64Url(part.media.url); + if (!extracted) { + throw Error( + `Invalid data URL format for media: ${part.media.url.substring(0, 50)}...` + ); + } + return { + type: 'file', + file: { + filename: generateFilenameFromContentType(extracted.contentType), + file_data: part.media.url, // Full data URL with prefix + }, + } as ChatCompletionContentPart; + } + + // If it's a remote URL with non-image content type, this is not supported + // for chat completions according to OpenAI docs + throw Error( + `File URLs are not supported for chat completions. Only base64-encoded files and image URLs are supported. Content type: ${contentType}` + ); } throw Error( `Unsupported genkit part fields encountered for current message role: ${JSON.stringify(part)}.` diff --git a/js/plugins/compat-oai/tests/compat_oai_test.ts b/js/plugins/compat-oai/tests/compat_oai_test.ts index 04c3a7cacb..ed2c6ee76b 100644 --- a/js/plugins/compat-oai/tests/compat_oai_test.ts +++ b/js/plugins/compat-oai/tests/compat_oai_test.ts @@ -99,7 +99,7 @@ describe('toOpenAiTextAndMedia', () => { expect(actualOutput).toStrictEqual({ type: 'text', text: 'hi' }); }); - it('should transform media content correctly', () => { + it('should transform image media content correctly', () => { const part: Part = { media: { contentType: 'image/jpeg', @@ -116,6 +116,67 @@ describe('toOpenAiTextAndMedia', () => { }); }); + it('should transform PDF file content correctly with base64 data', () => { + const part: Part = { + media: { + contentType: 'application/pdf', + url: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK', + }, + }; + const actualOutput = toOpenAITextAndMedia(part, 'low'); + expect(actualOutput).toStrictEqual({ + type: 'file', + file: { + filename: 'file.pdf', + file_data: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK', + }, + }); + }); + + it('should transform PDF file without explicit contentType from data URL', () => { + const part: Part = { + media: { + url: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK', + }, + }; + const actualOutput = toOpenAITextAndMedia(part, 'low'); + expect(actualOutput).toStrictEqual({ + type: 'file', + file: { + filename: 'file.pdf', + file_data: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK', + }, + }); + }); + + it('should transform image from data URL without explicit contentType', () => { + const part: Part = { + media: { + url: '', + }, + }; + const actualOutput = toOpenAITextAndMedia(part, 'high'); + expect(actualOutput).toStrictEqual({ + type: 'image_url', + image_url: { + url: '', + detail: 'high', + }, + }); + }); + + it('should throw error for file URLs (non-base64 PDFs)', () => { + const part: Part = { + media: { + contentType: 'application/pdf', + url: 'https://example.com/document.pdf', + }, + }; + expect(() => toOpenAITextAndMedia(part, 'low')).toThrowError( + 'File URLs are not supported for chat completions' + ); + }); + it('should throw an error for unknown parts', () => { const part: Part = { data: 'hi' }; expect(() => toOpenAITextAndMedia(part, 'low')).toThrowError( diff --git a/js/testapps/compat-oai/src/index.ts b/js/testapps/compat-oai/src/index.ts index 866fffa2e7..b7e8bb5e72 100644 --- a/js/testapps/compat-oai/src/index.ts +++ b/js/testapps/compat-oai/src/index.ts @@ -358,6 +358,45 @@ async function toWav( }); } +// PDF file input example +ai.defineFlow( + { + name: 'pdf', + inputSchema: z.string().default(''), + outputSchema: z.string(), + }, + async (pdfPath) => { + // Use a provided PDF path or create a minimal test PDF + let pdfBase64: string; + + if (pdfPath && fs.existsSync(pdfPath)) { + pdfBase64 = fs.readFileSync(pdfPath, { encoding: 'base64' }); + } else { + // Minimal valid PDF for testing (just contains "Hello World") + // This is a real PDF that can be parsed + pdfBase64 = + 'JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlL1BhZ2VzL0NvdW50IDEvS2lkc1szIDAgUl0+PgplbmRvYmoKMyAwIG9iago8PC9UeXBlL1BhZ2UvTWVkaWFCb3hbMCAwIDYxMiA3OTJdL1BhcmVudCAyIDAgUi9SZXNvdXJjZXM8PC9Gb250PDwvRjE8PC9UeXBlL0ZvbnQvU3VidHlwZS9UeXBlMS9CYXNlRm9udC9IZWx2ZXRpY2E+Pj4+Pj4vQ29udGVudHMgNCAwIFI+PgplbmRvYmoKNCAwIG9iago8PC9MZW5ndGggNDQ+PgpzdHJlYW0KQlQKL0YxIDI0IFRmCjEwMCA3MDAgVGQKKEhlbGxvIFdvcmxkKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCnhyZWYKMCA1CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAxNSAwMDAwMCBuIAowMDAwMDAwMDY0IDAwMDAwIG4gCjAwMDAwMDAxMjEgMDAwMDAgbiAKMDAwMDAwMDI2MCAwMDAwMCBuIAp0cmFpbGVyCjw8L1NpemUgNS9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjM1MgolJUVPRgo='; + } + + const { text } = await ai.generate({ + model: openAI.model('gpt-4o'), + prompt: [ + { + media: { + contentType: 'application/pdf', + url: `data:application/pdf;base64,${pdfBase64}`, + }, + }, + { + text: 'What text is in this PDF document? Please extract and return all the text you can read.', + }, + ], + }); + + return text; + } +); + startFlowServer({ flows: [jokeFlow, embedFlow], });