Skip to content

Commit 90fb2fc

Browse files
authored
feat(js/plugin/compat-oai): add support to input pdfs or base64 files (#3923)
1 parent cccede2 commit 90fb2fc

File tree

3 files changed

+194
-8
lines changed

3 files changed

+194
-8
lines changed

js/plugins/compat-oai/src/model.ts

Lines changed: 93 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,56 @@ export function toOpenAITool(tool: ToolDefinition): ChatCompletionTool {
9898
};
9999
}
100100

101+
/**
102+
* Checks if a content type is an image type.
103+
* @param contentType The content type to check.
104+
* @returns True if the content type is an image type.
105+
*/
106+
function isImageContentType(contentType?: string): boolean {
107+
if (!contentType) return false;
108+
return contentType.startsWith('image/');
109+
}
110+
111+
/**
112+
* Extracts the base64 data and content type from a data URL.
113+
* @param url The data URL to parse.
114+
* @returns The base64 data and content type, or null if invalid.
115+
*/
116+
function extractDataFromBase64Url(url: string): {
117+
data: string;
118+
contentType: string;
119+
} | null {
120+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
121+
return (
122+
match && {
123+
contentType: match[1],
124+
data: match[2],
125+
}
126+
);
127+
}
128+
129+
/**
130+
* Map of content types to file extensions.
131+
*/
132+
const FILE_EXTENSIONS: Record<string, string> = {
133+
'application/pdf': 'pdf',
134+
'application/msword': 'doc',
135+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
136+
'docx',
137+
'text/plain': 'txt',
138+
'text/csv': 'csv',
139+
};
140+
141+
/**
142+
* Generates a filename from a content type.
143+
* @param contentType The content type.
144+
* @returns A filename with appropriate extension.
145+
*/
146+
function generateFilenameFromContentType(contentType: string): string {
147+
const ext = FILE_EXTENSIONS[contentType] || '';
148+
return ext ? `file.${ext}` : 'file';
149+
}
150+
101151
/**
102152
* Converts a Genkit Part to the corresponding OpenAI ChatCompletionContentPart.
103153
* @param part The Genkit Part to convert.
@@ -115,13 +165,49 @@ export function toOpenAITextAndMedia(
115165
text: part.text,
116166
};
117167
} else if (part.media) {
118-
return {
119-
type: 'image_url',
120-
image_url: {
121-
url: part.media.url,
122-
detail: visualDetailLevel,
123-
},
124-
};
168+
// Determine the content type from the media part or data URL
169+
let contentType = part.media.contentType;
170+
if (!contentType && part.media.url.startsWith('data:')) {
171+
const extracted = extractDataFromBase64Url(part.media.url);
172+
if (extracted) {
173+
contentType = extracted.contentType;
174+
}
175+
}
176+
177+
// Check if this is an image type
178+
if (isImageContentType(contentType)) {
179+
return {
180+
type: 'image_url',
181+
image_url: {
182+
url: part.media.url,
183+
detail: visualDetailLevel,
184+
},
185+
};
186+
}
187+
188+
// For non-image types (like PDF), use the file type
189+
// OpenAI expects the full data URL (with data: prefix) in file_data
190+
if (part.media.url.startsWith('data:')) {
191+
const extracted = extractDataFromBase64Url(part.media.url);
192+
if (!extracted) {
193+
throw Error(
194+
`Invalid data URL format for media: ${part.media.url.substring(0, 50)}...`
195+
);
196+
}
197+
return {
198+
type: 'file',
199+
file: {
200+
filename: generateFilenameFromContentType(extracted.contentType),
201+
file_data: part.media.url, // Full data URL with prefix
202+
},
203+
} as ChatCompletionContentPart;
204+
}
205+
206+
// If it's a remote URL with non-image content type, this is not supported
207+
// for chat completions according to OpenAI docs
208+
throw Error(
209+
`File URLs are not supported for chat completions. Only base64-encoded files and image URLs are supported. Content type: ${contentType}`
210+
);
125211
}
126212
throw Error(
127213
`Unsupported genkit part fields encountered for current message role: ${JSON.stringify(part)}.`

js/plugins/compat-oai/tests/compat_oai_test.ts

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ describe('toOpenAiTextAndMedia', () => {
9999
expect(actualOutput).toStrictEqual({ type: 'text', text: 'hi' });
100100
});
101101

102-
it('should transform media content correctly', () => {
102+
it('should transform image media content correctly', () => {
103103
const part: Part = {
104104
media: {
105105
contentType: 'image/jpeg',
@@ -116,6 +116,67 @@ describe('toOpenAiTextAndMedia', () => {
116116
});
117117
});
118118

119+
it('should transform PDF file content correctly with base64 data', () => {
120+
const part: Part = {
121+
media: {
122+
contentType: 'application/pdf',
123+
url: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
124+
},
125+
};
126+
const actualOutput = toOpenAITextAndMedia(part, 'low');
127+
expect(actualOutput).toStrictEqual({
128+
type: 'file',
129+
file: {
130+
filename: 'file.pdf',
131+
file_data: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
132+
},
133+
});
134+
});
135+
136+
it('should transform PDF file without explicit contentType from data URL', () => {
137+
const part: Part = {
138+
media: {
139+
url: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
140+
},
141+
};
142+
const actualOutput = toOpenAITextAndMedia(part, 'low');
143+
expect(actualOutput).toStrictEqual({
144+
type: 'file',
145+
file: {
146+
filename: 'file.pdf',
147+
file_data: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
148+
},
149+
});
150+
});
151+
152+
it('should transform image from data URL without explicit contentType', () => {
153+
const part: Part = {
154+
media: {
155+
url: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA',
156+
},
157+
};
158+
const actualOutput = toOpenAITextAndMedia(part, 'high');
159+
expect(actualOutput).toStrictEqual({
160+
type: 'image_url',
161+
image_url: {
162+
url: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA',
163+
detail: 'high',
164+
},
165+
});
166+
});
167+
168+
it('should throw error for file URLs (non-base64 PDFs)', () => {
169+
const part: Part = {
170+
media: {
171+
contentType: 'application/pdf',
172+
url: 'https://example.com/document.pdf',
173+
},
174+
};
175+
expect(() => toOpenAITextAndMedia(part, 'low')).toThrowError(
176+
'File URLs are not supported for chat completions'
177+
);
178+
});
179+
119180
it('should throw an error for unknown parts', () => {
120181
const part: Part = { data: 'hi' };
121182
expect(() => toOpenAITextAndMedia(part, 'low')).toThrowError(

js/testapps/compat-oai/src/index.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,45 @@ async function toWav(
358358
});
359359
}
360360

361+
// PDF file input example
362+
ai.defineFlow(
363+
{
364+
name: 'pdf',
365+
inputSchema: z.string().default(''),
366+
outputSchema: z.string(),
367+
},
368+
async (pdfPath) => {
369+
// Use a provided PDF path or create a minimal test PDF
370+
let pdfBase64: string;
371+
372+
if (pdfPath && fs.existsSync(pdfPath)) {
373+
pdfBase64 = fs.readFileSync(pdfPath, { encoding: 'base64' });
374+
} else {
375+
// Minimal valid PDF for testing (just contains "Hello World")
376+
// This is a real PDF that can be parsed
377+
pdfBase64 =
378+
'JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlL1BhZ2VzL0NvdW50IDEvS2lkc1szIDAgUl0+PgplbmRvYmoKMyAwIG9iago8PC9UeXBlL1BhZ2UvTWVkaWFCb3hbMCAwIDYxMiA3OTJdL1BhcmVudCAyIDAgUi9SZXNvdXJjZXM8PC9Gb250PDwvRjE8PC9UeXBlL0ZvbnQvU3VidHlwZS9UeXBlMS9CYXNlRm9udC9IZWx2ZXRpY2E+Pj4+Pj4vQ29udGVudHMgNCAwIFI+PgplbmRvYmoKNCAwIG9iago8PC9MZW5ndGggNDQ+PgpzdHJlYW0KQlQKL0YxIDI0IFRmCjEwMCA3MDAgVGQKKEhlbGxvIFdvcmxkKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCnhyZWYKMCA1CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAxNSAwMDAwMCBuIAowMDAwMDAwMDY0IDAwMDAwIG4gCjAwMDAwMDAxMjEgMDAwMDAgbiAKMDAwMDAwMDI2MCAwMDAwMCBuIAp0cmFpbGVyCjw8L1NpemUgNS9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjM1MgolJUVPRgo=';
379+
}
380+
381+
const { text } = await ai.generate({
382+
model: openAI.model('gpt-4o'),
383+
prompt: [
384+
{
385+
media: {
386+
contentType: 'application/pdf',
387+
url: `data:application/pdf;base64,${pdfBase64}`,
388+
},
389+
},
390+
{
391+
text: 'What text is in this PDF document? Please extract and return all the text you can read.',
392+
},
393+
],
394+
});
395+
396+
return text;
397+
}
398+
);
399+
361400
startFlowServer({
362401
flows: [jokeFlow, embedFlow],
363402
});

0 commit comments

Comments
 (0)