feat(js/plugin/compat-oai): add support to input pdfs or base64 files (#3923)

xavidop · web-flow · commit 90fb2fcaee6b · 2025-12-15T10:04:54.000-05:00
diff --git a/js/plugins/compat-oai/src/model.ts b/js/plugins/compat-oai/src/model.ts
@@ -98,6 +98,56 @@ export function toOpenAITool(tool: ToolDefinition): ChatCompletionTool {
   };
 }
 
+/**
+ * Checks if a content type is an image type.
+ * @param contentType The content type to check.
+ * @returns True if the content type is an image type.
+ */
+function isImageContentType(contentType?: string): boolean {
+  if (!contentType) return false;
+  return contentType.startsWith('image/');
+}
+
+/**
+ * Extracts the base64 data and content type from a data URL.
+ * @param url The data URL to parse.
+ * @returns The base64 data and content type, or null if invalid.
+ */
+function extractDataFromBase64Url(url: string): {
+  data: string;
+  contentType: string;
+} | null {
+  const match = url.match(/^data:([^;]+);base64,(.+)$/);
+  return (
+    match && {
+      contentType: match[1],
+      data: match[2],
+    }
+  );
+}
+
+/**
+ * Map of content types to file extensions.
+ */
+const FILE_EXTENSIONS: Record<string, string> = {
+  'application/pdf': 'pdf',
+  'application/msword': 'doc',
+  'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
+    'docx',
+  'text/plain': 'txt',
+  'text/csv': 'csv',
+};
+
+/**
+ * Generates a filename from a content type.
+ * @param contentType The content type.
+ * @returns A filename with appropriate extension.
+ */
+function generateFilenameFromContentType(contentType: string): string {
+  const ext = FILE_EXTENSIONS[contentType] || '';
+  return ext ? `file.${ext}` : 'file';
+}
+
 /**
  * Converts a Genkit Part to the corresponding OpenAI ChatCompletionContentPart.
  * @param part The Genkit Part to convert.
@@ -115,13 +165,49 @@ export function toOpenAITextAndMedia(
       text: part.text,
     };
   } else if (part.media) {
-    return {
-      type: 'image_url',
-      image_url: {
-        url: part.media.url,
-        detail: visualDetailLevel,
-      },
-    };
+    // Determine the content type from the media part or data URL
+    let contentType = part.media.contentType;
+    if (!contentType && part.media.url.startsWith('data:')) {
+      const extracted = extractDataFromBase64Url(part.media.url);
+      if (extracted) {
+        contentType = extracted.contentType;
+      }
+    }
+
+    // Check if this is an image type
+    if (isImageContentType(contentType)) {
+      return {
+        type: 'image_url',
+        image_url: {
+          url: part.media.url,
+          detail: visualDetailLevel,
+        },
+      };
+    }
+
+    // For non-image types (like PDF), use the file type
+    // OpenAI expects the full data URL (with data: prefix) in file_data
+    if (part.media.url.startsWith('data:')) {
+      const extracted = extractDataFromBase64Url(part.media.url);
+      if (!extracted) {
+        throw Error(
+          `Invalid data URL format for media: ${part.media.url.substring(0, 50)}...`
+        );
+      }
+      return {
+        type: 'file',
+        file: {
+          filename: generateFilenameFromContentType(extracted.contentType),
+          file_data: part.media.url, // Full data URL with prefix
+        },
+      } as ChatCompletionContentPart;
+    }
+
+    // If it's a remote URL with non-image content type, this is not supported
+    // for chat completions according to OpenAI docs
+    throw Error(
+      `File URLs are not supported for chat completions. Only base64-encoded files and image URLs are supported. Content type: ${contentType}`
+    );
   }
   throw Error(
     `Unsupported genkit part fields encountered for current message role: ${JSON.stringify(part)}.`
diff --git a/js/plugins/compat-oai/tests/compat_oai_test.ts b/js/plugins/compat-oai/tests/compat_oai_test.ts
@@ -99,7 +99,7 @@ describe('toOpenAiTextAndMedia', () => {
     expect(actualOutput).toStrictEqual({ type: 'text', text: 'hi' });
   });
 
-  it('should transform media content correctly', () => {
+  it('should transform image media content correctly', () => {
     const part: Part = {
       media: {
         contentType: 'image/jpeg',
@@ -116,6 +116,67 @@ describe('toOpenAiTextAndMedia', () => {
     });
   });
 
+  it('should transform PDF file content correctly with base64 data', () => {
+    const part: Part = {
+      media: {
+        contentType: 'application/pdf',
+        url: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
+      },
+    };
+    const actualOutput = toOpenAITextAndMedia(part, 'low');
+    expect(actualOutput).toStrictEqual({
+      type: 'file',
+      file: {
+        filename: 'file.pdf',
+        file_data: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
+      },
+    });
+  });
+
+  it('should transform PDF file without explicit contentType from data URL', () => {
+    const part: Part = {
+      media: {
+        url: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
+      },
+    };
+    const actualOutput = toOpenAITextAndMedia(part, 'low');
+    expect(actualOutput).toStrictEqual({
+      type: 'file',
+      file: {
+        filename: 'file.pdf',
+        file_data: 'data:application/pdf;base64,JVBERi0xLjQKJeLjz9MK',
+      },
+    });
+  });
+
+  it('should transform image from data URL without explicit contentType', () => {
+    const part: Part = {
+      media: {
+        url: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA',
+      },
+    };
+    const actualOutput = toOpenAITextAndMedia(part, 'high');
+    expect(actualOutput).toStrictEqual({
+      type: 'image_url',
+      image_url: {
+        url: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUA',
+        detail: 'high',
+      },
+    });
+  });
+
+  it('should throw error for file URLs (non-base64 PDFs)', () => {
+    const part: Part = {
+      media: {
+        contentType: 'application/pdf',
+        url: 'https://example.com/document.pdf',
+      },
+    };
+    expect(() => toOpenAITextAndMedia(part, 'low')).toThrowError(
+      'File URLs are not supported for chat completions'
+    );
+  });
+
   it('should throw an error for unknown parts', () => {
     const part: Part = { data: 'hi' };
     expect(() => toOpenAITextAndMedia(part, 'low')).toThrowError(
diff --git a/js/testapps/compat-oai/src/index.ts b/js/testapps/compat-oai/src/index.ts
@@ -358,6 +358,45 @@ async function toWav(
   });
 }
 
+// PDF file input example
+ai.defineFlow(
+  {
+    name: 'pdf',
+    inputSchema: z.string().default(''),
+    outputSchema: z.string(),
+  },
+  async (pdfPath) => {
+    // Use a provided PDF path or create a minimal test PDF
+    let pdfBase64: string;
+
+    if (pdfPath && fs.existsSync(pdfPath)) {
+      pdfBase64 = fs.readFileSync(pdfPath, { encoding: 'base64' });
+    } else {
+      // Minimal valid PDF for testing (just contains "Hello World")
+      // This is a real PDF that can be parsed
+      pdfBase64 =
+        'JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PgplbmRvYmoKMiAwIG9iago8PC9UeXBlL1BhZ2VzL0NvdW50IDEvS2lkc1szIDAgUl0+PgplbmRvYmoKMyAwIG9iago8PC9UeXBlL1BhZ2UvTWVkaWFCb3hbMCAwIDYxMiA3OTJdL1BhcmVudCAyIDAgUi9SZXNvdXJjZXM8PC9Gb250PDwvRjE8PC9UeXBlL0ZvbnQvU3VidHlwZS9UeXBlMS9CYXNlRm9udC9IZWx2ZXRpY2E+Pj4+Pj4vQ29udGVudHMgNCAwIFI+PgplbmRvYmoKNCAwIG9iago8PC9MZW5ndGggNDQ+PgpzdHJlYW0KQlQKL0YxIDI0IFRmCjEwMCA3MDAgVGQKKEhlbGxvIFdvcmxkKSBUagpFVAplbmRzdHJlYW0KZW5kb2JqCnhyZWYKMCA1CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDAxNSAwMDAwMCBuIAowMDAwMDAwMDY0IDAwMDAwIG4gCjAwMDAwMDAxMjEgMDAwMDAgbiAKMDAwMDAwMDI2MCAwMDAwMCBuIAp0cmFpbGVyCjw8L1NpemUgNS9Sb290IDEgMCBSPj4Kc3RhcnR4cmVmCjM1MgolJUVPRgo=';
+    }
+
+    const { text } = await ai.generate({
+      model: openAI.model('gpt-4o'),
+      prompt: [
+        {
+          media: {
+            contentType: 'application/pdf',
+            url: `data:application/pdf;base64,${pdfBase64}`,
+          },
+        },
+        {
+          text: 'What text is in this PDF document? Please extract and return all the text you can read.',
+        },
+      ],
+    });
+
+    return text;
+  }
+);
+
 startFlowServer({
   flows: [jokeFlow, embedFlow],
 });