Skip to content

Commit 5b87c8b

Browse files
committed
Use filetype to get the mime type of image bytes, fix vision related issue, remove some unused functions
1 parent d2d46e1 commit 5b87c8b

File tree

3 files changed

+13
-63
lines changed

3 files changed

+13
-63
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ dependencies = [
1717
"pyaudio",
1818
"pillow",
1919
"psutil",
20+
"filetype",
2021

2122
"openai",
2223
"anthropic",

pyqt_openai/util/common.py

Lines changed: 11 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import sys
1818
import tempfile
1919
import time
20+
import filetype
2021
import traceback
2122
import wave
2223
import zipfile
@@ -581,63 +582,6 @@ def get_chat_model(is_g4f=False):
581582
all_models.extend(obj.get("model_list", []))
582583
return all_models
583584

584-
def get_gemini_argument(model, system, messages, cur_text, stream, images):
585-
try:
586-
args = {
587-
"system": system,
588-
"model": model,
589-
"messages": messages,
590-
"stream": stream,
591-
}
592-
if len(images) > 0:
593-
args["images"] = [PIL.Image.open(BytesIO(image)) for image in images]
594-
args["messages"].append({"role": "user", "content": cur_text})
595-
return args
596-
except Exception as e:
597-
print(e)
598-
raise e
599-
600-
601-
def get_claude_argument(model, system, messages, cur_text, stream, images):
602-
try:
603-
args = {
604-
"model": model,
605-
"system": system,
606-
"messages": messages,
607-
"max_tokens": DEFAULT_TOKEN_CHUNK_SIZE,
608-
"stream": stream,
609-
}
610-
# TODO REFACTORING (FOR COMMON FUNCTION FOR VISION)
611-
# Vision
612-
if len(images) > 0:
613-
multiple_images_content = []
614-
for image in images:
615-
multiple_images_content.append(
616-
{
617-
"type": "image",
618-
"source": {
619-
"type": "base64",
620-
"media_type": "image/png",
621-
"data": get_image_url_from_local(image),
622-
},
623-
}
624-
)
625-
626-
multiple_images_content = multiple_images_content[:] + [
627-
{"type": "text", "text": cur_text}
628-
]
629-
630-
args["messages"].append(
631-
{"role": "user", "content": multiple_images_content}
632-
)
633-
else:
634-
args["messages"].append({"role": "user", "content": cur_text})
635-
return args
636-
except Exception as e:
637-
print(e)
638-
raise e
639-
640-
641585
def set_api_key(env_var_name, api_key):
642586
api_key = api_key.strip() if api_key else ""
643587
if env_var_name == "OPENAI_API_KEY":
@@ -655,7 +599,14 @@ def set_api_key(env_var_name, api_key):
655599
# Set environment variables dynamically
656600
os.environ[env_var_name] = api_key
657601

658-
def get_image_url_from_local(image, is_openai=False):
602+
def get_mime_type_from_bytes(byte_data):
603+
kind = filetype.guess(byte_data)
604+
if kind is None:
605+
raise ValueError("Could not determine MIME type from bytes")
606+
print(kind.mime)
607+
return kind.mime
608+
609+
def get_image_url_from_local(image):
659610
"""
660611
Image is bytes, this function converts it to base64 and returns the image url
661612
"""
@@ -665,10 +616,7 @@ def encode_image(image):
665616
return base64.b64encode(image).decode("utf-8")
666617

667618
base64_image = encode_image(image)
668-
if is_openai:
669-
return f"data:image/jpeg;base64,{base64_image}"
670-
else:
671-
return base64_image
619+
return f"data:{get_mime_type_from_bytes(image)};base64,{base64_image}"
672620

673621

674622
def get_message_obj(role, content):
@@ -839,7 +787,7 @@ def get_api_argument(
839787
{
840788
"type": "image_url",
841789
"image_url": {
842-
"url": get_image_url_from_local(image, is_openai=True),
790+
"url": get_image_url_from_local(image),
843791
},
844792
}
845793
)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ requests
55
pyaudio
66
pillow
77
psutil
8+
filetype
89

910
openai
1011
anthropic

0 commit comments

Comments
 (0)