feat: supported gpt4o models (#107)

epam · May 31, 2024 · 8bb3b8b · 8bb3b8b
1 parent a80f455
commit 8bb3b8b
Show file tree

Hide file tree

Showing 13 changed files with 314 additions and 213 deletions.
diff --git a/.ort.yml b/.ort.yml
@@ -19,9 +19,9 @@ resolutions:
     - message: ".*PyPI::numpy:1\\.26\\.0.*"
       reason: "CANT_FIX_EXCEPTION"
       comment: "BSD 3-Clause license: https://github.com/numpy/numpy/blob/v1.26.0/LICENSES_bundled.txt"
-    - message: ".*PyPI::tiktoken:0\\.5\\.1.*"
+    - message: ".*PyPI::tiktoken:0\\.7\\.0.*"
       reason: "CANT_FIX_EXCEPTION"
-      comment: "MIT License: https://github.com/openai/tiktoken/blob/0.5.1/LICENSE"
+      comment: "MIT License: https://github.com/openai/tiktoken/blob/0.7.0/LICENSE"
     - message: ".*PyPI::httpcore:0\\.18\\.0.*"
       reason: "CANT_FIX_EXCEPTION"
       comment: "BSD 3-Clause New or Revised License: https://github.com/encode/httpcore/blob/0.18.0/LICENSE.md"

diff --git a/README.md b/README.md
@@ -39,6 +39,7 @@ make serve
 ```
 
 ### Make on Windows
+
 As of now, Windows distributions do not include the make tool. To run make commands, the tool can be installed using
 the following command (since [Windows 10](https://learn.microsoft.com/en-us/windows/package-manager/winget/)):
 ```sh
@@ -69,6 +70,7 @@ Copy `.env.example` to `.env` and customize it for your environment:
 |CORE_API_VERSION||Supported value `0.6` to work with the old version of the file api|
 |MISTRAL_DEPLOYMENTS|``|Comma-separated list of deployments that support Mistral Large Azure API. Example: `mistral-large-azure,mistral-large`|
 |DATABRICKS_DEPLOYMENTS|``|Comma-separated list of Databricks chat completion deployments. Example: `databricks-dbrx-instruct,databricks-mixtral-8x7b-instruct,databricks-llama-2-70b-chat`|
+|GPT4O_DEPLOYMENTS|``|Comma-separated list of GPT-4o chat completion deployments. Example: `gpt-4o-2024-05-13`|
 
 ### Docker
 

diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py
@@ -16,8 +16,9 @@
 from aidial_adapter_openai.databricks import (
     chat_completion as databricks_chat_completion,
 )
-from aidial_adapter_openai.gpt4_vision.chat_completion import (
-    chat_completion as gpt4_vision_chat_completion,
+from aidial_adapter_openai.gpt4_multi_modal.chat_completion import (
+    gpt4_vision_chat_completion,
+    gpt4o_chat_completion,
 )
 from aidial_adapter_openai.mistral import (
     chat_completion as mistral_chat_completion,
@@ -55,6 +56,7 @@
 databricks_deployments = parse_deployment_list(
     os.getenv("DATABRICKS_DEPLOYMENTS") or ""
 )
+gpt4o_deployments = parse_deployment_list(os.getenv("GPT4O_DEPLOYMENTS") or "")
 api_versions_mapping: Dict[str, str] = json.loads(
     os.getenv("API_VERSIONS_MAPPING", "{}")
 )
@@ -142,6 +144,22 @@ async def chat_completion(deployment_id: str, request: Request):
     openai_model_name = model_aliases.get(deployment_id, deployment_id)
     tokenizer = Tokenizer(model=openai_model_name)
 
+    if deployment_id in gpt4o_deployments:
+        storage = create_file_storage("images", request.headers)
+        return await handle_exceptions(
+            gpt4o_chat_completion(
+                data,
+                deployment_id,
+                upstream_endpoint,
+                api_key,
+                is_stream,
+                storage,
+                api_type,
+                api_version,
+                tokenizer,
+            )
+        )
+
     discarded_messages = None
     if "max_prompt_tokens" in data:
         max_prompt_tokens = data["max_prompt_tokens"]