From 8bb3b8bfd5a52da61c1e4a81dbeeec04d624bb5b Mon Sep 17 00:00:00 2001
From: Anton Dubovik <anton_dubovik@epam.com>
Date: Fri, 31 May 2024 16:57:01 +0200
Subject: [PATCH] feat: supported gpt4o models (#107)

---
 .ort.yml                                      |   4 +-
 README.md                                     |   2 +
 aidial_adapter_openai/app.py                  |  22 +-
 .../chat_completion.py                        | 232 +++++-------------
 .../gpt4_multi_modal/download.py              | 157 ++++++++++++
 .../gpt4_vision.py}                           |  17 +-
 .../image_tokenizer.py                        |   5 +-
 .../messages.py                               |   0
 aidial_adapter_openai/utils/streaming.py      |   2 +-
 aidial_adapter_openai/utils/tokens.py         |  11 +-
 poetry.lock                                   |  69 +++---
 pyproject.toml                                |   2 +-
 tests/test_image_tokenization.py              |   4 +-
 13 files changed, 314 insertions(+), 213 deletions(-)
 rename aidial_adapter_openai/{gpt4_vision => gpt4_multi_modal}/chat_completion.py (54%)
 create mode 100644 aidial_adapter_openai/gpt4_multi_modal/download.py
 rename aidial_adapter_openai/{gpt4_vision/gpt4_conversion.py => gpt4_multi_modal/gpt4_vision.py} (77%)
 rename aidial_adapter_openai/{gpt4_vision => gpt4_multi_modal}/image_tokenizer.py (96%)
 rename aidial_adapter_openai/{gpt4_vision => gpt4_multi_modal}/messages.py (100%)

diff --git a/.ort.yml b/.ort.yml
index 5bd5efa..62ed830 100644
--- a/.ort.yml
+++ b/.ort.yml
@@ -19,9 +19,9 @@ resolutions:
     - message: ".*PyPI::numpy:1\\.26\\.0.*"
       reason: "CANT_FIX_EXCEPTION"
       comment: "BSD 3-Clause license: https://github.com/numpy/numpy/blob/v1.26.0/LICENSES_bundled.txt"
-    - message: ".*PyPI::tiktoken:0\\.5\\.1.*"
+    - message: ".*PyPI::tiktoken:0\\.7\\.0.*"
       reason: "CANT_FIX_EXCEPTION"
-      comment: "MIT License: https://github.com/openai/tiktoken/blob/0.5.1/LICENSE"
+      comment: "MIT License: https://github.com/openai/tiktoken/blob/0.7.0/LICENSE"
     - message: ".*PyPI::httpcore:0\\.18\\.0.*"
       reason: "CANT_FIX_EXCEPTION"
       comment: "BSD 3-Clause New or Revised License: https://github.com/encode/httpcore/blob/0.18.0/LICENSE.md"
diff --git a/README.md b/README.md
index 41fc2d4..a589f85 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ make serve
 ```
 
 ### Make on Windows
+
 As of now, Windows distributions do not include the make tool. To run make commands, the tool can be installed using
 the following command (since [Windows 10](https://learn.microsoft.com/en-us/windows/package-manager/winget/)):
 ```sh
@@ -69,6 +70,7 @@ Copy `.env.example` to `.env` and customize it for your environment:
 |CORE_API_VERSION||Supported value `0.6` to work with the old version of the file api|
 |MISTRAL_DEPLOYMENTS|``|Comma-separated list of deployments that support Mistral Large Azure API. Example: `mistral-large-azure,mistral-large`|
 |DATABRICKS_DEPLOYMENTS|``|Comma-separated list of Databricks chat completion deployments. Example: `databricks-dbrx-instruct,databricks-mixtral-8x7b-instruct,databricks-llama-2-70b-chat`|
+|GPT4O_DEPLOYMENTS|``|Comma-separated list of GPT-4o chat completion deployments. Example: `gpt-4o-2024-05-13`|
 
 ### Docker
 
diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py
index 3b0d6a8..bc1bed2 100644
--- a/aidial_adapter_openai/app.py
+++ b/aidial_adapter_openai/app.py
@@ -16,8 +16,9 @@
 from aidial_adapter_openai.databricks import (
     chat_completion as databricks_chat_completion,
 )
-from aidial_adapter_openai.gpt4_vision.chat_completion import (
-    chat_completion as gpt4_vision_chat_completion,
+from aidial_adapter_openai.gpt4_multi_modal.chat_completion import (
+    gpt4_vision_chat_completion,
+    gpt4o_chat_completion,
 )
 from aidial_adapter_openai.mistral import (
     chat_completion as mistral_chat_completion,
@@ -55,6 +56,7 @@
 databricks_deployments = parse_deployment_list(
     os.getenv("DATABRICKS_DEPLOYMENTS") or ""
 )
+gpt4o_deployments = parse_deployment_list(os.getenv("GPT4O_DEPLOYMENTS") or "")
 api_versions_mapping: Dict[str, str] = json.loads(
     os.getenv("API_VERSIONS_MAPPING", "{}")
 )
@@ -142,6 +144,22 @@ async def chat_completion(deployment_id: str, request: Request):
     openai_model_name = model_aliases.get(deployment_id, deployment_id)
     tokenizer = Tokenizer(model=openai_model_name)
 
+    if deployment_id in gpt4o_deployments:
+        storage = create_file_storage("images", request.headers)
+        return await handle_exceptions(
+            gpt4o_chat_completion(
+                data,
+                deployment_id,
+                upstream_endpoint,
+                api_key,
+                is_stream,
+                storage,
+                api_type,
+                api_version,
+                tokenizer,
+            )
+        )
+
     discarded_messages = None
     if "max_prompt_tokens" in data:
         max_prompt_tokens = data["max_prompt_tokens"]
diff --git a/aidial_adapter_openai/gpt4_vision/chat_completion.py b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
similarity index 54%
rename from aidial_adapter_openai/gpt4_vision/chat_completion.py
rename to aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
index fdf2beb..a74e2fa 100644
--- a/aidial_adapter_openai/gpt4_vision/chat_completion.py
+++ b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py
@@ -1,12 +1,11 @@
-import mimetypes
 import os
 from typing import (
     Any,
     AsyncIterator,
+    Callable,
     Dict,
     List,
     Optional,
-    Tuple,
     TypeVar,
     cast,
 )
@@ -14,42 +13,32 @@
 import aiohttp
 from fastapi.responses import JSONResponse, Response, StreamingResponse
 
-from aidial_adapter_openai.gpt4_vision.gpt4_conversion import (
-    convert_gpt4v_to_gpt4_chunk,
+from aidial_adapter_openai.gpt4_multi_modal.download import (
+    SUPPORTED_FILE_EXTS,
+    transform_messages,
 )
-from aidial_adapter_openai.gpt4_vision.image_tokenizer import tokenize_image
-from aidial_adapter_openai.gpt4_vision.messages import (
-    create_image_message,
-    create_text_message,
+from aidial_adapter_openai.gpt4_multi_modal.gpt4_vision import (
+    convert_gpt4v_to_gpt4_chunk,
 )
 from aidial_adapter_openai.utils.auth import get_auth_header
 from aidial_adapter_openai.utils.exceptions import HTTPException
-from aidial_adapter_openai.utils.image_data_url import ImageDataURL
 from aidial_adapter_openai.utils.log_config import logger
 from aidial_adapter_openai.utils.sse_stream import (
     parse_openai_sse_stream,
     to_openai_sse_stream,
 )
-from aidial_adapter_openai.utils.storage import (
-    FileStorage,
-    download_file_as_base64,
-)
+from aidial_adapter_openai.utils.storage import FileStorage
 from aidial_adapter_openai.utils.streaming import (
     create_error_response,
     generate_stream,
     map_stream,
     prepend_to_stream,
 )
-from aidial_adapter_openai.utils.text import format_ordinal
 from aidial_adapter_openai.utils.tokens import Tokenizer
 
 # The built-in default max_tokens is 16 tokens,
 # which is too small for most image-to-text use cases.
-DEFAULT_MAX_TOKENS = int(os.getenv("GPT4_VISION_MAX_TOKENS", "1024"))
-
-# Officially supported image types by GPT-4 Vision
-SUPPORTED_IMAGE_TYPES = ["image/jpeg", "image/png", "image/webp", "image/gif"]
-SUPPORTED_FILE_EXTS = ["jpg", "jpeg", "png", "webp", "gif"]
+GPT4V_DEFAULT_MAX_TOKENS = int(os.getenv("GPT4_VISION_MAX_TOKENS", "1024"))
 
 USAGE = f"""
 ### Usage
@@ -120,143 +109,55 @@ async def predict_non_stream(
             return await response.json()
 
 
-def guess_attachment_type(attachment: dict) -> Optional[str]:
-    type = attachment.get("type")
-    if type is None:
-        return None
-
-    if "octet-stream" in type:
-        # It's an arbitrary binary file. Trying to guess the type from the URL.
-        url = attachment.get("url")
-        if url is not None:
-            url_type = mimetypes.guess_type(url)[0]
-            if url_type is not None:
-                return url_type
-        return None
-
-    return type
-
-
-async def download_image(
-    file_storage: Optional[FileStorage], attachment: dict
-) -> ImageDataURL | str:
-    try:
-        type = guess_attachment_type(attachment)
-        if type is None:
-            return "Can't derive media type of the attachment"
-        elif type not in SUPPORTED_IMAGE_TYPES:
-            return f"The attachment isn't one of the supported types: {type}"
-
-        if "data" in attachment:
-            return ImageDataURL(type=type, data=attachment["data"])
-
-        if "url" in attachment:
-            attachment_link: str = attachment["url"]
-
-            image_url = ImageDataURL.from_data_url(attachment_link)
-            if image_url is not None:
-                if image_url.type in SUPPORTED_IMAGE_TYPES:
-                    return image_url
-                else:
-                    return (
-                        "The image attachment isn't one of the supported types"
-                    )
-
-            if file_storage is not None:
-                url = file_storage.attachment_link_to_url(attachment_link)
-                data = await file_storage.download_file_as_base64(url)
-            else:
-                data = await download_file_as_base64(attachment_link)
-
-            return ImageDataURL(type=type, data=data)
-
-        return "Invalid attachment"
-
-    except Exception as e:
-        logger.debug(f"Failed to download image: {e}")
-        return "Failed to download image"
-
-
-async def transform_message(
-    file_storage: Optional[FileStorage], message: dict
-) -> Tuple[dict, int] | List[Tuple[int, str]]:
-    content = message.get("content", "")
-    custom_content = message.get("custom_content", {})
-    attachments = custom_content.get("attachments", [])
-
-    message = {k: v for k, v in message.items() if k != "custom_content"}
-
-    if len(attachments) == 0:
-        return message, 0
-
-    logger.debug(f"original attachments: {attachments}")
-
-    download_results: List[ImageDataURL | str] = [
-        await download_image(file_storage, attachment)
-        for attachment in attachments
-    ]
-
-    logger.debug(f"download results: {download_results}")
-
-    errors: List[Tuple[int, str]] = [
-        (idx, result)
-        for idx, result in enumerate(download_results)
-        if isinstance(result, str)
-    ]
-
-    if len(errors) > 0:
-        logger.debug(f"download errors: {errors}")
-        return errors
-
-    image_urls: List[ImageDataURL] = cast(List[ImageDataURL], download_results)
-
-    image_tokens: List[int] = []
-    image_messages: List[dict] = []
-
-    for image_url in image_urls:
-        tokens, detail = tokenize_image(image_url, "auto")
-        image_tokens.append(tokens)
-        image_messages.append(create_image_message(image_url, detail))
-
-    total_image_tokens = sum(image_tokens)
-
-    logger.debug(f"image tokens: {image_tokens}")
-
-    sub_messages: List[dict] = [create_text_message(content)] + image_messages
-
-    return {**message, "content": sub_messages}, total_image_tokens
-
-
-async def transform_messages(
-    file_storage: Optional[FileStorage], messages: List[dict]
-) -> Tuple[List[dict], int] | str:
-    new_messages: List[dict] = []
-    image_tokens = 0
-
-    errors: Dict[int, List[Tuple[int, str]]] = {}
-
-    n = len(messages)
-    for idx, message in enumerate(messages):
-        result = await transform_message(file_storage, message)
-        if isinstance(result, list):
-            errors[n - idx] = result
-        else:
-            new_message, tokens = result
-            new_messages.append(new_message)
-            image_tokens += tokens
-
-    if errors:
-        msg = "Some of the image attachments failed to download:"
-        for i, error in errors.items():
-            msg += f"\n- {format_ordinal(i)} message from end:"
-            for j, err in error:
-                msg += f"\n  - {format_ordinal(j + 1)} attachment: {err}"
-        return msg
-
-    return new_messages, image_tokens
-
-
-T = TypeVar("T")
+async def gpt4o_chat_completion(
+    request: Any,
+    deployment: str,
+    upstream_endpoint: str,
+    api_key: str,
+    is_stream: bool,
+    file_storage: Optional[FileStorage],
+    api_type: str,
+    api_version: str,
+    tokenizer: Tokenizer,
+) -> Response:
+    return await chat_completion(
+        request,
+        deployment,
+        upstream_endpoint,
+        api_key,
+        is_stream,
+        file_storage,
+        api_type,
+        api_version,
+        tokenizer,
+        lambda x: x,
+        None,
+    )
+
+
+async def gpt4_vision_chat_completion(
+    request: Any,
+    deployment: str,
+    upstream_endpoint: str,
+    api_key: str,
+    is_stream: bool,
+    file_storage: Optional[FileStorage],
+    api_type: str,
+    api_version: str,
+) -> Response:
+    return await chat_completion(
+        request,
+        deployment,
+        upstream_endpoint,
+        api_key,
+        is_stream,
+        file_storage,
+        api_type,
+        api_version,
+        Tokenizer("gpt-4"),
+        convert_gpt4v_to_gpt4_chunk,
+        GPT4V_DEFAULT_MAX_TOKENS,
+    )
 
 
 async def chat_completion(
@@ -268,7 +169,11 @@ async def chat_completion(
     file_storage: Optional[FileStorage],
     api_type: str,
     api_version: str,
+    tokenizer: Tokenizer,
+    response_transformer: Callable[[dict], dict | None],
+    default_max_tokens: int | None,
 ) -> Response:
+
     if request.get("n", 1) > 1:
         raise HTTPException(
             status_code=422,
@@ -289,7 +194,7 @@ async def chat_completion(
     result = await transform_messages(file_storage, messages)
 
     if isinstance(result, str):
-        logger.debug(f"Failed to prepare request for GPT4V: {result}")
+        logger.debug(f"Failed to prepare request: {result}")
 
         if file_storage is not None:
             # Report user-level error if the request came from the chat
@@ -305,15 +210,12 @@ async def chat_completion(
 
     new_messages, prompt_image_tokens = result
 
-    tokenizer = Tokenizer(model="gpt-4")
     prompt_text_tokens = tokenizer.calculate_prompt_tokens(messages)
     estimated_prompt_tokens = prompt_text_tokens + prompt_image_tokens
 
-    max_tokens = request.get("max_tokens", DEFAULT_MAX_TOKENS)
-
     request = {
         **request,
-        "max_tokens": max_tokens,
+        "max_tokens": request.get("max_tokens") or default_max_tokens,
         "messages": new_messages,
     }
 
@@ -324,6 +226,8 @@ async def chat_completion(
         if isinstance(response, Response):
             return response
 
+        T = TypeVar("T")
+
         def debug_print(chunk: T) -> T:
             logger.debug(f"chunk: {chunk}")
             return chunk
@@ -334,7 +238,7 @@ def debug_print(chunk: T) -> T:
                     debug_print,
                     generate_stream(
                         stream=map_stream(
-                            convert_gpt4v_to_gpt4_chunk,
+                            response_transformer,
                             parse_openai_sse_stream(response),
                         ),
                         prompt_tokens=estimated_prompt_tokens,
@@ -351,7 +255,7 @@ def debug_print(chunk: T) -> T:
         if isinstance(response, Response):
             return response
 
-        response = convert_gpt4v_to_gpt4_chunk(response)
+        response = response_transformer(response)
         if response is None:
             raise HTTPException(
                 status_code=500,
@@ -359,7 +263,7 @@ def debug_print(chunk: T) -> T:
                 type="invalid_response_error",
             )
 
-        content = response["choices"][0]["message"].get("content", "")
+        content = response["choices"][0]["message"].get("content") or ""
         usage = response["usage"]
 
         actual_prompt_tokens = usage["prompt_tokens"]
diff --git a/aidial_adapter_openai/gpt4_multi_modal/download.py b/aidial_adapter_openai/gpt4_multi_modal/download.py
new file mode 100644
index 0000000..6756549
--- /dev/null
+++ b/aidial_adapter_openai/gpt4_multi_modal/download.py
@@ -0,0 +1,157 @@
+import mimetypes
+from typing import Dict, List, Optional, Tuple, cast
+
+from aidial_adapter_openai.gpt4_multi_modal.image_tokenizer import (
+    tokenize_image,
+)
+from aidial_adapter_openai.gpt4_multi_modal.messages import (
+    create_image_message,
+    create_text_message,
+)
+from aidial_adapter_openai.utils.image_data_url import ImageDataURL
+from aidial_adapter_openai.utils.log_config import logger
+from aidial_adapter_openai.utils.storage import (
+    FileStorage,
+    download_file_as_base64,
+)
+from aidial_adapter_openai.utils.text import format_ordinal
+
+# Officially supported image types by GPT-4 Vision, GPT-4o
+SUPPORTED_IMAGE_TYPES = ["image/jpeg", "image/png", "image/webp", "image/gif"]
+SUPPORTED_FILE_EXTS = ["jpg", "jpeg", "png", "webp", "gif"]
+
+
+def guess_attachment_type(attachment: dict) -> Optional[str]:
+    type = attachment.get("type")
+    if type is None:
+        return None
+
+    if "octet-stream" in type:
+        # It's an arbitrary binary file. Trying to guess the type from the URL.
+        url = attachment.get("url")
+        if url is not None:
+            url_type = mimetypes.guess_type(url)[0]
+            if url_type is not None:
+                return url_type
+        return None
+
+    return type
+
+
+async def download_image(
+    file_storage: Optional[FileStorage], attachment: dict
+) -> ImageDataURL | str:
+    try:
+        type = guess_attachment_type(attachment)
+        if type is None:
+            return "Can't derive media type of the attachment"
+        elif type not in SUPPORTED_IMAGE_TYPES:
+            return f"The attachment isn't one of the supported types: {type}"
+
+        if "data" in attachment:
+            return ImageDataURL(type=type, data=attachment["data"])
+
+        if "url" in attachment:
+            attachment_link: str = attachment["url"]
+
+            image_url = ImageDataURL.from_data_url(attachment_link)
+            if image_url is not None:
+                if image_url.type in SUPPORTED_IMAGE_TYPES:
+                    return image_url
+                else:
+                    return (
+                        "The image attachment isn't one of the supported types"
+                    )
+
+            if file_storage is not None:
+                url = file_storage.attachment_link_to_url(attachment_link)
+                data = await file_storage.download_file_as_base64(url)
+            else:
+                data = await download_file_as_base64(attachment_link)
+
+            return ImageDataURL(type=type, data=data)
+
+        return "Invalid attachment"
+
+    except Exception as e:
+        logger.debug(f"Failed to download image: {e}")
+        return "Failed to download image"
+
+
+async def transform_message(
+    file_storage: Optional[FileStorage], message: dict
+) -> Tuple[dict, int] | List[Tuple[int, str]]:
+    content = message.get("content", "")
+    custom_content = message.get("custom_content", {})
+    attachments = custom_content.get("attachments", [])
+
+    message = {k: v for k, v in message.items() if k != "custom_content"}
+
+    if len(attachments) == 0:
+        return message, 0
+
+    logger.debug(f"original attachments: {attachments}")
+
+    download_results: List[ImageDataURL | str] = [
+        await download_image(file_storage, attachment)
+        for attachment in attachments
+    ]
+
+    logger.debug(f"download results: {download_results}")
+
+    errors: List[Tuple[int, str]] = [
+        (idx, result)
+        for idx, result in enumerate(download_results)
+        if isinstance(result, str)
+    ]
+
+    if len(errors) > 0:
+        logger.debug(f"download errors: {errors}")
+        return errors
+
+    image_urls: List[ImageDataURL] = cast(List[ImageDataURL], download_results)
+
+    image_tokens: List[int] = []
+    image_messages: List[dict] = []
+
+    for image_url in image_urls:
+        tokens, detail = tokenize_image(image_url, "auto")
+        image_tokens.append(tokens)
+        image_messages.append(create_image_message(image_url, detail))
+
+    total_image_tokens = sum(image_tokens)
+
+    logger.debug(f"image tokens: {image_tokens}")
+
+    sub_messages: List[dict] = [create_text_message(content)] + image_messages
+
+    return {**message, "content": sub_messages}, total_image_tokens
+
+
+async def transform_messages(
+    file_storage: Optional[FileStorage], messages: List[dict]
+) -> Tuple[List[dict], int] | str:
+    new_messages: List[dict] = []
+    image_tokens = 0
+
+    errors: Dict[int, List[Tuple[int, str]]] = {}
+
+    n = len(messages)
+    for idx, message in enumerate(messages):
+        result = await transform_message(file_storage, message)
+        if isinstance(result, list):
+            errors[n - idx] = result
+        else:
+            new_message, tokens = result
+            new_messages.append(new_message)
+            image_tokens += tokens
+
+    if errors:
+        msg = "Some of the image attachments failed to download:"
+        for i, error in errors.items():
+            msg += f"\n- {format_ordinal(i)} message from end:"
+            for j, err in error:
+                msg += f"\n  - {format_ordinal(j + 1)} attachment: {err}"
+        return msg
+
+    return new_messages, image_tokens
diff --git a/aidial_adapter_openai/gpt4_vision/gpt4_conversion.py b/aidial_adapter_openai/gpt4_multi_modal/gpt4_vision.py
similarity index 77%
rename from aidial_adapter_openai/gpt4_vision/gpt4_conversion.py
rename to aidial_adapter_openai/gpt4_multi_modal/gpt4_vision.py
index 15e483f..eef5673 100644
--- a/aidial_adapter_openai/gpt4_vision/gpt4_conversion.py
+++ b/aidial_adapter_openai/gpt4_multi_modal/gpt4_vision.py
@@ -3,7 +3,7 @@
 from aidial_adapter_openai.utils.log_config import logger
 
 
-def convert_finish_reason(finish_type: Optional[str]) -> Optional[str]:
+def convert_to_finish_reason(finish_type: Optional[str]) -> Optional[str]:
     match finish_type:
         case None:
             return None
@@ -42,19 +42,20 @@ def convert_gpt4v_to_gpt4_choice(choice: dict) -> dict:
     """GPT4 Vision choice is slightly different from the vanilla GPT4 choice
     in how it reports finish reason."""
 
-    gpt4v_finish_type: Optional[str] = (choice.get("finish_details") or {}).get(
-        "type"
-    )
-    gpt4_finish_reason: Optional[str] = convert_finish_reason(gpt4v_finish_type)
-
     ret = choice.copy()
 
     if "finish_details" in ret:
+        gpt4v_finish_type: Optional[str] = ret["finish_details"].get("type")
+        gpt4_finish_reason: Optional[str] = convert_to_finish_reason(
+            gpt4v_finish_type
+        )
+
+        if gpt4_finish_reason is not None:
+            ret["finish_reason"] = gpt4_finish_reason
+
         del ret["finish_details"]
 
     if "content_filter_results" in ret:
         del ret["content_filter_results"]
 
-    ret["finish_reason"] = gpt4_finish_reason
-
     return ret
diff --git a/aidial_adapter_openai/gpt4_vision/image_tokenizer.py b/aidial_adapter_openai/gpt4_multi_modal/image_tokenizer.py
similarity index 96%
rename from aidial_adapter_openai/gpt4_vision/image_tokenizer.py
rename to aidial_adapter_openai/gpt4_multi_modal/image_tokenizer.py
index ac06181..a894589 100644
--- a/aidial_adapter_openai/gpt4_vision/image_tokenizer.py
+++ b/aidial_adapter_openai/gpt4_multi_modal/image_tokenizer.py
@@ -10,7 +10,10 @@
 
 from PIL import Image
 
-from aidial_adapter_openai.gpt4_vision.messages import DetailLevel, ImageDetail
+from aidial_adapter_openai.gpt4_multi_modal.messages import (
+    DetailLevel,
+    ImageDetail,
+)
 from aidial_adapter_openai.utils.image_data_url import ImageDataURL
 
 
diff --git a/aidial_adapter_openai/gpt4_vision/messages.py b/aidial_adapter_openai/gpt4_multi_modal/messages.py
similarity index 100%
rename from aidial_adapter_openai/gpt4_vision/messages.py
rename to aidial_adapter_openai/gpt4_multi_modal/messages.py
diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py
index 40e40a6..4f22857 100644
--- a/aidial_adapter_openai/utils/streaming.py
+++ b/aidial_adapter_openai/utils/streaming.py
@@ -106,7 +106,7 @@ async def generate_stream(
                 "total_tokens": prompt_tokens + completion_tokens,
             }
             last_chunk["choices"][0]["delta"]["content"] = ""
-            last_chunk["choices"][0]["delta"]["finish_reason"] = "length"
+            last_chunk["choices"][0]["finish_reason"] = "length"
 
             yield last_chunk
         else:
diff --git a/aidial_adapter_openai/utils/tokens.py b/aidial_adapter_openai/utils/tokens.py
index 6d9681e..bbc0224 100644
--- a/aidial_adapter_openai/utils/tokens.py
+++ b/aidial_adapter_openai/utils/tokens.py
@@ -15,7 +15,16 @@ class Tokenizer:
 
     def __init__(self, model: str) -> None:
         self.model = model
-        self.encoding = encoding_for_model(model)
+        try:
+            self.encoding = encoding_for_model(model)
+        except KeyError:
+            raise HTTPException(
+                message=f"Could not find tokenizer for the model {model!r} in tiktoken. "
+                "Consider mapping the model to an existing tokenizer via MODEL_ALIASES env var, "
+                "or declare it as a model which doesn't require tokenization through tiktoken.",
+                status_code=500,
+                type="interval_server_error",
+            )
 
     def calculate_tokens(self, string: str) -> int:
         return len(self.encoding.encode(string))
diff --git a/poetry.lock b/poetry.lock
index 2ac1e6a..1a7eb04 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2115,40 +2115,47 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7
 
 [[package]]
 name = "tiktoken"
-version = "0.5.1"
+version = "0.7.0"
 description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "tiktoken-0.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2b0bae3fd56de1c0a5874fb6577667a3c75bf231a6cef599338820210c16e40a"},
-    {file = "tiktoken-0.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e529578d017045e2f0ed12d2e00e7e99f780f477234da4aae799ec4afca89f37"},
-    {file = "tiktoken-0.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edd2ffbb789712d83fee19ab009949f998a35c51ad9f9beb39109357416344ff"},
-    {file = "tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4c73d47bdc1a3f1f66ffa019af0386c48effdc6e8797e5e76875f6388ff72e9"},
-    {file = "tiktoken-0.5.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:46b8554b9f351561b1989157c6bb54462056f3d44e43aa4e671367c5d62535fc"},
-    {file = "tiktoken-0.5.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:92ed3bbf71a175a6a4e5fbfcdb2c422bdd72d9b20407e00f435cf22a68b4ea9b"},
-    {file = "tiktoken-0.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:714efb2f4a082635d9f5afe0bf7e62989b72b65ac52f004eb7ac939f506c03a4"},
-    {file = "tiktoken-0.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a10488d1d1a5f9c9d2b2052fdb4cf807bba545818cb1ef724a7f5d44d9f7c3d4"},
-    {file = "tiktoken-0.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8079ac065572fe0e7c696dbd63e1fdc12ce4cdca9933935d038689d4732451df"},
-    {file = "tiktoken-0.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ef730db4097f5b13df8d960f7fdda2744fe21d203ea2bb80c120bb58661b155"},
-    {file = "tiktoken-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:426e7def5f3f23645dada816be119fa61e587dfb4755de250e136b47a045c365"},
-    {file = "tiktoken-0.5.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:323cec0031358bc09aa965c2c5c1f9f59baf76e5b17e62dcc06d1bb9bc3a3c7c"},
-    {file = "tiktoken-0.5.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5abd9436f02e2c8eda5cce2ff8015ce91f33e782a7423de2a1859f772928f714"},
-    {file = "tiktoken-0.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:1fe99953b63aabc0c9536fbc91c3c9000d78e4755edc28cc2e10825372046a2d"},
-    {file = "tiktoken-0.5.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:dcdc630461927718b317e6f8be7707bd0fc768cee1fdc78ddaa1e93f4dc6b2b1"},
-    {file = "tiktoken-0.5.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1f2b3b253e22322b7f53a111e1f6d7ecfa199b4f08f3efdeb0480f4033b5cdc6"},
-    {file = "tiktoken-0.5.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:43ce0199f315776dec3ea7bf86f35df86d24b6fcde1babd3e53c38f17352442f"},
-    {file = "tiktoken-0.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a84657c083d458593c0235926b5c993eec0b586a2508d6a2020556e5347c2f0d"},
-    {file = "tiktoken-0.5.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c008375c0f3d97c36e81725308699116cd5804fdac0f9b7afc732056329d2790"},
-    {file = "tiktoken-0.5.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:779c4dea5edd1d3178734d144d32231e0b814976bec1ec09636d1003ffe4725f"},
-    {file = "tiktoken-0.5.1-cp38-cp38-win_amd64.whl", hash = "sha256:b5dcfcf9bfb798e86fbce76d40a1d5d9e3f92131aecfa3d1e5c9ea1a20f1ef1a"},
-    {file = "tiktoken-0.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b180a22db0bbcc447f691ffc3cf7a580e9e0587d87379e35e58b826ebf5bc7b"},
-    {file = "tiktoken-0.5.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b756a65d98b7cf760617a6b68762a23ab8b6ef79922be5afdb00f5e8a9f4e76"},
-    {file = "tiktoken-0.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba9873c253ca1f670e662192a0afcb72b41e0ba3e730f16c665099e12f4dac2d"},
-    {file = "tiktoken-0.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74c90d2be0b4c1a2b3f7dde95cd976757817d4df080d6af0ee8d461568c2e2ad"},
-    {file = "tiktoken-0.5.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:709a5220891f2b56caad8327fab86281787704931ed484d9548f65598dea9ce4"},
-    {file = "tiktoken-0.5.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5d5a187ff9c786fae6aadd49f47f019ff19e99071dc5b0fe91bfecc94d37c686"},
-    {file = "tiktoken-0.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:e21840043dbe2e280e99ad41951c00eff8ee3b63daf57cd4c1508a3fd8583ea2"},
-    {file = "tiktoken-0.5.1.tar.gz", hash = "sha256:27e773564232004f4f810fd1f85236673ec3a56ed7f1206fc9ed8670ebedb97a"},
+    {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"},
+    {file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"},
+    {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79383a6e2c654c6040e5f8506f3750db9ddd71b550c724e673203b4f6b4b4590"},
+    {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d4511c52caacf3c4981d1ae2df85908bd31853f33d30b345c8b6830763f769c"},
+    {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:13c94efacdd3de9aff824a788353aa5749c0faee1fbe3816df365ea450b82311"},
+    {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8e58c7eb29d2ab35a7a8929cbeea60216a4ccdf42efa8974d8e176d50c9a3df5"},
+    {file = "tiktoken-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:21a20c3bd1dd3e55b91c1331bf25f4af522c525e771691adbc9a69336fa7f702"},
+    {file = "tiktoken-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:10c7674f81e6e350fcbed7c09a65bca9356eaab27fb2dac65a1e440f2bcfe30f"},
+    {file = "tiktoken-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:084cec29713bc9d4189a937f8a35dbdfa785bd1235a34c1124fe2323821ee93f"},
+    {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:811229fde1652fedcca7c6dfe76724d0908775b353556d8a71ed74d866f73f7b"},
+    {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b6e7dc2e7ad1b3757e8a24597415bafcfb454cebf9a33a01f2e6ba2e663992"},
+    {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1063c5748be36344c7e18c7913c53e2cca116764c2080177e57d62c7ad4576d1"},
+    {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:20295d21419bfcca092644f7e2f2138ff947a6eb8cfc732c09cc7d76988d4a89"},
+    {file = "tiktoken-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:959d993749b083acc57a317cbc643fb85c014d055b2119b739487288f4e5d1cb"},
+    {file = "tiktoken-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:71c55d066388c55a9c00f61d2c456a6086673ab7dec22dd739c23f77195b1908"},
+    {file = "tiktoken-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09ed925bccaa8043e34c519fbb2f99110bd07c6fd67714793c21ac298e449410"},
+    {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03c6c40ff1db0f48a7b4d2dafeae73a5607aacb472fa11f125e7baf9dce73704"},
+    {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20b5c6af30e621b4aca094ee61777a44118f52d886dbe4f02b70dfe05c15350"},
+    {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d427614c3e074004efa2f2411e16c826f9df427d3c70a54725cae860f09e4bf4"},
+    {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8c46d7af7b8c6987fac9b9f61041b452afe92eb087d29c9ce54951280f899a97"},
+    {file = "tiktoken-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:0bc603c30b9e371e7c4c7935aba02af5994a909fc3c0fe66e7004070858d3f8f"},
+    {file = "tiktoken-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2398fecd38c921bcd68418675a6d155fad5f5e14c2e92fcf5fe566fa5485a858"},
+    {file = "tiktoken-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8f5f6afb52fb8a7ea1c811e435e4188f2bef81b5e0f7a8635cc79b0eef0193d6"},
+    {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:861f9ee616766d736be4147abac500732b505bf7013cfaf019b85892637f235e"},
+    {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54031f95c6939f6b78122c0aa03a93273a96365103793a22e1793ee86da31685"},
+    {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:fffdcb319b614cf14f04d02a52e26b1d1ae14a570f90e9b55461a72672f7b13d"},
+    {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c72baaeaefa03ff9ba9688624143c858d1f6b755bb85d456d59e529e17234769"},
+    {file = "tiktoken-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:131b8aeb043a8f112aad9f46011dced25d62629091e51d9dc1adbf4a1cc6aa98"},
+    {file = "tiktoken-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cabc6dc77460df44ec5b879e68692c63551ae4fae7460dd4ff17181df75f1db7"},
+    {file = "tiktoken-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8d57f29171255f74c0aeacd0651e29aa47dff6f070cb9f35ebc14c82278f3b25"},
+    {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ee92776fdbb3efa02a83f968c19d4997a55c8e9ce7be821ceee04a1d1ee149c"},
+    {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e215292e99cb41fbc96988ef62ea63bb0ce1e15f2c147a61acc319f8b4cbe5bf"},
+    {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a81bac94769cab437dd3ab0b8a4bc4e0f9cf6835bcaa88de71f39af1791727a"},
+    {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d6d73ea93e91d5ca771256dfc9d1d29f5a554b83821a1dc0891987636e0ae226"},
+    {file = "tiktoken-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:2bcb28ddf79ffa424f171dfeef9a4daff61a94c631ca6813f43967cb263b83b9"},
+    {file = "tiktoken-0.7.0.tar.gz", hash = "sha256:1077266e949c24e0291f6c350433c6f0971365ece2b173a23bc3b9f9defef6b6"},
 ]
 
 [package.dependencies]
@@ -2433,4 +2440,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11,<3.13"
-content-hash = "983ac1732769e4abe29749bfa202593f50421cfc4c0f73edb12b0f1c6850d58f"
+content-hash = "5a674ea6662452a73044e1eade33b60d4dc8da28dd079d443f389667d3cfe39f"
diff --git a/pyproject.toml b/pyproject.toml
index 7422860..7c6cfbf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@ env_files = [".env"]
 python = ">=3.11,<3.13"
 fastapi = "0.109.2"
 openai = "0.28.1"
-tiktoken = "0.5.1"
+tiktoken = "0.7.0"
 uvicorn = "0.23"
 wrapt = "^1.15.0"
 pydantic = "^1.10.12"
diff --git a/tests/test_image_tokenization.py b/tests/test_image_tokenization.py
index 7dc7826..34541e2 100644
--- a/tests/test_image_tokenization.py
+++ b/tests/test_image_tokenization.py
@@ -2,10 +2,10 @@
 
 import pytest
 
-from aidial_adapter_openai.gpt4_vision.image_tokenizer import (
+from aidial_adapter_openai.gpt4_multi_modal.image_tokenizer import (
     tokenize_image_by_size,
 )
-from aidial_adapter_openai.gpt4_vision.messages import ImageDetail
+from aidial_adapter_openai.gpt4_multi_modal.messages import ImageDetail
 
 test_cases: List[Tuple[int, int, ImageDetail, int]] = [
     (1, 1, "auto", 85),