From 997ea630095b4f8ee14573c0ba051600c72c203f Mon Sep 17 00:00:00 2001
From: Dens Sumesh <dens.sumesh79@gmail.com>
Date: Thu, 9 Jan 2025 20:59:27 -0800
Subject: [PATCH] feature: use images with rag over chunks

---
 clients/search-component/package.json         |  2 +-
 .../src/utils/hooks/chat-context.tsx          | 12 +++++-----
 clients/ts-sdk/openapi.json                   |  8 +++++++
 clients/ts-sdk/src/types.gen.ts               |  4 ++++
 server/src/handlers/chunk_handler.rs          | 23 ++++++++++++++++++-
 5 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/clients/search-component/package.json b/clients/search-component/package.json
index d84dac30d..762742495 100644
--- a/clients/search-component/package.json
+++ b/clients/search-component/package.json
@@ -19,7 +19,7 @@
       "import": "./dist/vanilla/index.js"
     }
   },
-  "version": "0.3.8",
+  "version": "0.3.9",
   "license": "MIT",
   "homepage": "https://github.com/devflowinc/trieve/tree/main/clients/search-component",
   "scripts": {
diff --git a/clients/search-component/src/utils/hooks/chat-context.tsx b/clients/search-component/src/utils/hooks/chat-context.tsx
index c5fc3c503..9aaaf5bac 100644
--- a/clients/search-component/src/utils/hooks/chat-context.tsx
+++ b/clients/search-component/src/utils/hooks/chat-context.tsx
@@ -180,6 +180,7 @@ function ChatProvider({ children }: { children: React.ReactNode }) {
       const { reader, queryId } = await trieveSDK.ragOnChunkReaderWithQueryId(
         {
           chunk_ids: groupChunks.map((c) => c.id),
+          image_urls: imageUrl ? [imageUrl] : [],
           prev_messages: [
             ...messages.slice(0, -1).map((m) => mapMessageType(m)),
             {
@@ -215,11 +216,11 @@ function ChatProvider({ children }: { children: React.ReactNode }) {
           chatMessageAbortController.current.signal
         );
 
-      if (imageUrl) {
-        setImageUrl("");
-      }
       handleReader(reader, queryId);
     }
+    if (imageUrl) {
+      setImageUrl("");
+    }
   };
 
   const chatWithGroup = (group: ChunkGroup, betterGroupName?: string) => {
@@ -260,9 +261,8 @@ function ChatProvider({ children }: { children: React.ReactNode }) {
     setIsDoneReading(false);
 
     if (props.groupTrackingId) {
-      
       const fetchedGroup = await trieveSDK.getGroupByTrackingId({
-        trackingId: props.groupTrackingId
+        trackingId: props.groupTrackingId,
       });
       if (fetchedGroup) {
         group = {
@@ -290,7 +290,7 @@ function ChatProvider({ children }: { children: React.ReactNode }) {
         text: question || currentQuestion,
         additional: null,
         queryId: null,
-        imageUrl: imageUrl ? imageUrl : null
+        imageUrl: imageUrl ? imageUrl : null,
       },
     ]);
 
diff --git a/clients/ts-sdk/openapi.json b/clients/ts-sdk/openapi.json
index a1f678280..49b71fca2 100644
--- a/clients/ts-sdk/openapi.json
+++ b/clients/ts-sdk/openapi.json
@@ -11040,6 +11040,14 @@
             ],
             "nullable": true
           },
+          "image_urls": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Image URLs to be used in the chat. These will be used to generate the image tokens for the model. The default is None.",
+            "nullable": true
+          },
           "max_tokens": {
             "type": "integer",
             "format": "int32",
diff --git a/clients/ts-sdk/src/types.gen.ts b/clients/ts-sdk/src/types.gen.ts
index 1486d1b07..c291f5b83 100644
--- a/clients/ts-sdk/src/types.gen.ts
+++ b/clients/ts-sdk/src/types.gen.ts
@@ -1560,6 +1560,10 @@ export type GenerateOffChunksReqPayload = {
      */
     highlight_results?: (boolean) | null;
     image_config?: ((ImageConfig) | null);
+    /**
+     * Image URLs to be used in the chat. These will be used to generate the image tokens for the model. The default is None.
+     */
+    image_urls?: Array<(string)> | null;
     /**
      * The maximum number of tokens to generate in the chat completion. Default is None.
      */
diff --git a/server/src/handlers/chunk_handler.rs b/server/src/handlers/chunk_handler.rs
index 34d8d049b..ff70de80a 100644
--- a/server/src/handlers/chunk_handler.rs
+++ b/server/src/handlers/chunk_handler.rs
@@ -2322,6 +2322,8 @@ pub struct GenerateOffChunksReqPayload {
     pub chunk_ids: Vec<uuid::Uuid>,
     /// Prompt will be used to tell the model what to generate in the next message in the chat. The default is 'Respond to the previous instruction and include the doc numbers that you used in square brackets at the end of the sentences that you used the docs for:'. You can also specify an empty string to leave the final message alone such that your user's final message can be used as the prompt. See docs.trieve.ai or contact us for more information.
     pub prompt: Option<String>,
+    /// Image URLs to be used in the chat. These will be used to generate the image tokens for the model. The default is None.
+    pub image_urls: Option<Vec<String>>,
     /// Whether or not to stream the response. If this is set to true or not included, the response will be a stream. If this is set to false, the response will be a normal JSON response. Default is true.
     pub stream_response: Option<bool>,
     /// Set highlight_results to false for a slight latency improvement (1-10ms). If not specified, this defaults to true. This will add `<mark><b>` tags to the chunk_html of the chunks to highlight matching splits.
@@ -2493,7 +2495,26 @@ pub async fn generate_off_chunks(
             name: None,
         });
 
-        if let Some(image_config) = &data.image_config {
+        if let Some(image_urls) = data.image_urls.clone() {
+            if !image_urls.is_empty() {
+                messages.push(ChatMessage::User {
+                    name: None,
+                    content: ChatMessageContent::ImageUrl(
+                        image_urls
+                            .iter()
+                            .map(|url| ImageUrl {
+                                r#type: "image_url".to_string(),
+                                text: None,
+                                image_url: ImageUrlType {
+                                    url: url.to_string(),
+                                    detail: None,
+                                },
+                            })
+                            .collect(),
+                    ),
+                });
+            }
+        } else if let Some(image_config) = &data.image_config {
             if image_config.use_images.unwrap_or(false) {
                 if let Some(image_urls) = chunk_metadata.image_urls.clone() {
                     let urls = image_urls