Add upload svg image support

Fix upload image in Bing Provider
xtekky · Jan 23, 2024 · 07c944a · 07c944a
1 parent 38dbe4b
commit 07c944a
Show file tree

Hide file tree

Showing 6 changed files with 51 additions and 26 deletions.
diff --git a/g4f/Provider/bing/upload_image.py b/g4f/Provider/bing/upload_image.py
@@ -82,13 +82,16 @@ def build_image_upload_payload(image_bin: str, tone: str) -> Tuple[str, str]:
         Tuple[str, str]: The data and boundary for the payload.
     """
     boundary = "----WebKitFormBoundary" + ''.join(random.choices(string.ascii_letters + string.digits, k=16))
-    data = f"--{boundary}\r\n" \
-           f"Content-Disposition: form-data; name=\"knowledgeRequest\"\r\n\r\n" \
-           f"{json.dumps(build_knowledge_request(tone), ensure_ascii=False)}\r\n" \
-           f"--{boundary}\r\n" \
-           f"Content-Disposition: form-data; name=\"imageBase64\"\r\n\r\n" \
-           f"{image_bin}\r\n" \
-           f"--{boundary}--\r\n"
+    data = f"""--{boundary}
+Content-Disposition: form-data; name="knowledgeRequest"
+
+{json.dumps(build_knowledge_request(tone), ensure_ascii=False)}
+--{boundary}
+Content-Disposition: form-data; name="imageBase64"
+
+{image_bin}
+--{boundary}--
+"""
     return data, boundary
 
 def build_knowledge_request(tone: str) -> dict:
@@ -102,14 +105,17 @@ def build_knowledge_request(tone: str) -> dict:
         dict: The knowledge request payload.
     """
     return {
-        'invokedSkills': ["ImageById"],
-        'subscriptionId': "Bing.Chat.Multimodal",
-        'invokedSkillsRequestData': {
-            'enableFaceBlur': True
-        },
-        'convoData': {
-            'convoid': "",
-            'convotone': tone
+        "imageInfo": {},
+        "knowledgeRequest": {
+            'invokedSkills': ["ImageById"],
+            'subscriptionId': "Bing.Chat.Multimodal",
+            'invokedSkillsRequestData': {
+                'enableFaceBlur': True
+            },
+            'convoData': {
+                'convoid': "",
+                'convotone': tone
+            }
         }
     }
 

diff --git a/g4f/gui/client/html/index.html b/g4f/gui/client/html/index.html
@@ -115,11 +115,11 @@
                     <textarea id="message-input" placeholder="Ask a question" cols="30" rows="10"
                         style="white-space: pre-wrap;resize: none;"></textarea>
                     <label for="image" title="Works only with Bing and OpenaiChat">
-                        <input type="file" id="image" name="image" accept="image/png, image/gif, image/jpeg" required/>
+                        <input type="file" id="image" name="image" accept="image/png, image/gif, image/jpeg, image/svg+xml" required/>
                         <i class="fa-regular fa-image"></i>
                     </label>
                     <label for="file">
-                        <input type="file" id="file" name="file" accept="text/plain, text/html, text/xml, application/json, text/javascript, .sh, .py, .php, .css, .yaml, .sql, .svg, .log, .csv, .twig, .md" required/>
+                        <input type="file" id="file" name="file" accept="text/plain, text/html, text/xml, application/json, text/javascript, .sh, .py, .php, .css, .yaml, .sql, .log, .csv, .twig, .md" required/>
                         <i class="fa-solid fa-paperclip"></i>
                     </label>
                     <div id="send-button">

diff --git a/g4f/gui/client/js/chat.v1.js b/g4f/gui/client/js/chat.v1.js
@@ -660,7 +660,13 @@ observer.observe(message_input, { attributes: true });
     }
     document.getElementById("version_text").innerHTML = text
 })()
-
+imageInput.addEventListener('click', async (event) => {
+    imageInput.value = '';
+});
+fileInput.addEventListener('click', async (event) => {
+    fileInput.value = '';
+    delete fileInput.dataset.text;
+});
 fileInput.addEventListener('change', async (event) => {
     if (fileInput.files.length) {
         type = fileInput.files[0].type;

diff --git a/g4f/gui/server/backend.py b/g4f/gui/server/backend.py
@@ -137,7 +137,7 @@ def _prepare_conversation_kwargs(self):
         if 'image' in request.files:
             file = request.files['image']
             if file.filename != '' and is_allowed_extension(file.filename):
-                kwargs['image'] = to_image(file.stream)
+                kwargs['image'] = to_image(file.stream, file.filename.endswith('.svg'))
         if 'json' in request.form:
             json_data = json.loads(request.form['json'])
         else:

diff --git a/g4f/image.py b/g4f/image.py
@@ -4,9 +4,9 @@
 from .typing import ImageType, Union
 from PIL import Image
 
-ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'webp'}
+ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'webp', 'svg'}
 
-def to_image(image: ImageType) -> Image.Image:
+def to_image(image: ImageType, is_svg: bool = False) -> Image.Image:
     """
     Converts the input image to a PIL Image object.
 
@@ -16,6 +16,16 @@ def to_image(image: ImageType) -> Image.Image:
     Returns:
         Image.Image: The converted PIL Image object.
     """
+    if is_svg:
+        try:
+            import cairosvg
+        except ImportError:
+            raise RuntimeError('Install "cairosvg" package for open svg images')
+        if not isinstance(image, bytes):
+            image = image.read()
+        buffer = BytesIO()
+        cairosvg.svg2png(image, write_to=buffer)
+        image = Image.open(buffer)
     if isinstance(image, str):
         is_data_uri_an_image(image)
         image = extract_data_uri(image)
@@ -153,6 +163,8 @@ def to_base64(image: Image.Image, compression_rate: float) -> str:
         str: The base64-encoded image.
     """
     output_buffer = BytesIO()
+    if image.mode != "RGB":
+        image = image.convert('RGB')
     image.save(output_buffer, format="JPEG", quality=int(compression_rate * 100))
     return base64.b64encode(output_buffer.getvalue()).decode()
 

diff --git a/g4f/models.py b/g4f/models.py
@@ -5,6 +5,7 @@
     Chatgpt4Online,
     ChatgptDemoAi,
     GeminiProChat,
+    PerplexityAi,
     ChatgptNext,
     HuggingChat,
     ChatgptDemo,
@@ -78,7 +79,7 @@ def __all__() -> list[str]:
 gpt_35_turbo = Model(
     name          = 'gpt-3.5-turbo',
     base_provider = 'openai',
-    best_provider=RetryProvider([ 
+    best_provider = RetryProvider([ 
         GptGo, You,
         GptForLove, ChatBase,
         Chatgpt4Online,
@@ -114,20 +115,20 @@ def __all__() -> list[str]:
 llama2_70b = Model(
     name          = "meta-llama/Llama-2-70b-chat-hf",
     base_provider = "huggingface",
-    best_provider = RetryProvider([Llama2, DeepInfra, HuggingChat])
+    best_provider = RetryProvider([Llama2, DeepInfra, HuggingChat, PerplexityAi])
 )
 
 # Mistal
 mixtral_8x7b = Model(
     name          = "mistralai/Mixtral-8x7B-Instruct-v0.1",
     base_provider = "huggingface",
-    best_provider = RetryProvider([DeepInfra, HuggingChat])
+    best_provider = RetryProvider([DeepInfra, HuggingChat, PerplexityAi])
 )
 
 mistral_7b = Model(
     name          = "mistralai/Mistral-7B-Instruct-v0.1",
     base_provider = "huggingface",
-    best_provider = RetryProvider([DeepInfra, HuggingChat])
+    best_provider = RetryProvider([DeepInfra, HuggingChat, PerplexityAi])
 )
 
 # Dolphin
@@ -311,7 +312,7 @@ def __all__() -> list[str]:
 pi = Model(
     name = 'pi',
     base_provider = 'inflection',
-    best_provider=Pi
+    best_provider = Pi
 )
 
 class ModelUtils: