Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 93 additions & 7 deletions js/plugins/compat-oai/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,56 @@ export function toOpenAITool(tool: ToolDefinition): ChatCompletionTool {
};
}

/**
* Checks if a content type is an image type.
* @param contentType The content type to check.
* @returns True if the content type is an image type.
*/
function isImageContentType(contentType?: string): boolean {
if (!contentType) return false;
return contentType.startsWith('image/');
}

/**
* Extracts the base64 data and content type from a data URL.
* @param url The data URL to parse.
* @returns The base64 data and content type, or null if invalid.
*/
function extractDataFromBase64Url(url: string): {
data: string;
contentType: string;
} | null {
const match = url.match(/^data:([^;]+);base64,(.+)$/);
return (
match && {
contentType: match[1],
data: match[2],
}
);
}

/**
* Map of content types to file extensions.
*/
const FILE_EXTENSIONS: Record<string, string> = {
'application/pdf': 'pdf',
'application/msword': 'doc',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
'docx',
'text/plain': 'txt',
'text/csv': 'csv',
};

/**
* Generates a filename from a content type.
* @param contentType The content type.
* @returns A filename with appropriate extension.
*/
function generateFilenameFromContentType(contentType: string): string {
const ext = FILE_EXTENSIONS[contentType] || '';
return ext ? `file.${ext}` : 'file';
}

/**
* Converts a Genkit Part to the corresponding OpenAI ChatCompletionContentPart.
* @param part The Genkit Part to convert.
Expand All @@ -115,13 +165,49 @@ export function toOpenAITextAndMedia(
text: part.text,
};
} else if (part.media) {
return {
type: 'image_url',
image_url: {
url: part.media.url,
detail: visualDetailLevel,
},
};
// Determine the content type from the media part or data URL
let contentType = part.media.contentType;
if (!contentType && part.media.url.startsWith('data:')) {
const extracted = extractDataFromBase64Url(part.media.url);
if (extracted) {
contentType = extracted.contentType;
}
}

// Check if this is an image type
if (isImageContentType(contentType)) {
return {
type: 'image_url',
image_url: {
url: part.media.url,
detail: visualDetailLevel,
},
};
}

// For non-image types (like PDF), use the file type
// OpenAI expects the full data URL (with data: prefix) in file_data
if (part.media.url.startsWith('data:')) {
const extracted = extractDataFromBase64Url(part.media.url);
if (!extracted) {
throw Error(
`Invalid data URL format for media: ${part.media.url.substring(0, 50)}...`
);
}
return {
type: 'file',
file: {
filename: generateFilenameFromContentType(extracted.contentType),
file_data: part.media.url, // Full data URL with prefix
},
} as ChatCompletionContentPart;
}

// If it's a remote URL with non-image content type, this is not supported
// for chat completions according to OpenAI docs
throw Error(
`File URLs are not supported for chat completions. Only base64-encoded files and image URLs are supported. Content type: ${contentType}`
);
}
throw Error(
`Unsupported genkit part fields encountered for current message role: ${JSON.stringify(part)}.`
Expand Down
63 changes: 62 additions & 1 deletion js/plugins/compat-oai/tests/compat_oai_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ describe('toOpenAiTextAndMedia', () => {
expect(actualOutput).toStrictEqual({ type: 'text', text: 'hi' });
});

it('should transform media content correctly', () => {
it('should transform image media content correctly', () => {
const part: Part = {
media: {
contentType: 'image/jpeg',
Expand All @@ -116,6 +116,67 @@ describe('toOpenAiTextAndMedia', () => {
});
});

it('should transform PDF file content correctly with base64 data', () => {
const part: Part = {
media: {
contentType: 'application/pdf',
url: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
},
};
const actualOutput = toOpenAITextAndMedia(part, 'low');
expect(actualOutput).toStrictEqual({
type: 'file',
file: {
filename: 'file.pdf',
file_data: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
},
});
});

it('should transform PDF file without explicit contentType from data URL', () => {
const part: Part = {
media: {
url: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
},
};
const actualOutput = toOpenAITextAndMedia(part, 'low');
expect(actualOutput).toStrictEqual({
type: 'file',
file: {
filename: 'file.pdf',
file_data: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
},
});
});

it('should transform image from data URL without explicit contentType', () => {
const part: Part = {
media: {
url: '',
},
};
const actualOutput = toOpenAITextAndMedia(part, 'high');
expect(actualOutput).toStrictEqual({
type: 'image_url',
image_url: {
url: '',
detail: 'high',
},
});
});

it('should throw error for file URLs (non-base64 PDFs)', () => {
const part: Part = {
media: {
contentType: 'application/pdf',
url: 'https://example.com/document.pdf',
},
};
expect(() => toOpenAITextAndMedia(part, 'low')).toThrowError(
'File URLs are not supported for chat completions'
);
});

it('should throw an error for unknown parts', () => {
const part: Part = { data: 'hi' };
expect(() => toOpenAITextAndMedia(part, 'low')).toThrowError(
Expand Down
39 changes: 39 additions & 0 deletions js/testapps/compat-oai/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,45 @@ async function toWav(
});
}

// PDF file input example
ai.defineFlow(
{
name: 'pdf',
inputSchema: z.string().default(''),
outputSchema: z.string(),
},
async (pdfPath) => {
// Use a provided PDF path or create a minimal test PDF
let pdfBase64: string;

if (pdfPath && fs.existsSync(pdfPath)) {
pdfBase64 = fs.readFileSync(pdfPath, { encoding: 'base64' });
} else {
// Minimal valid PDF for testing (just contains "Hello World")
// This is a real PDF that can be parsed
pdfBase64 =
'JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlL1BhZ2VzL0NvdW50IDEvS2lkc1szIDAgUl0+PgplbmRvYmoKMyAwIG9iago8PC9UeXBlL1BhZ2UvTWVkaWFCb3hbMCAwIDYxMiA3OTJdL1BhcmVudCAyIDAgUi9SZXNvdXJjZXM8PC9Gb250PDwvRjE8PC9UeXBlL0ZvbnQvU3VidHlwZS9UeXBlMS9CYXNlRm9udC9IZWx2ZXRpY2E+Pj4+Pj4vQ29udGVudHMgNCAwIFI+PgplbmRvYmoKNCAwIG9iago8PC9MZW5ndGggNDQ+PgpzdHJlYW0KQlQKL0YxIDI0IFRmCjEwMCA3MDAgVGQKKEhlbGxvIFdvcmxkKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCnhyZWYKMCA1CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAxNSAwMDAwMCBuIAowMDAwMDAwMDY0IDAwMDAwIG4gCjAwMDAwMDAxMjEgMDAwMDAgbiAKMDAwMDAwMDI2MCAwMDAwMCBuIAp0cmFpbGVyCjw8L1NpemUgNS9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjM1MgolJUVPRgo=';
}

const { text } = await ai.generate({
model: openAI.model('gpt-4o'),
prompt: [
{
media: {
contentType: 'application/pdf',
url: `data:application/pdf;base64,${pdfBase64}`,
},
},
{
text: 'What text is in this PDF document? Please extract and return all the text you can read.',
},
],
});

return text;
}
);

startFlowServer({
flows: [jokeFlow, embedFlow],
});