Skip to content

Commit

Permalink
fix: make image tokenizer respect 'detail' field (#167)
Browse files Browse the repository at this point in the history
  • Loading branch information
adubovik authored Nov 5, 2024
1 parent ca28d1d commit 83b9dd2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
19 changes: 13 additions & 6 deletions aidial_adapter_openai/gpt4_multi_modal/transformation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import List, Set, cast
from typing import List, Optional, Set, cast

from aidial_sdk.exceptions import HTTPException as DialException
from aidial_sdk.exceptions import InvalidRequestError
Expand All @@ -13,7 +13,7 @@
parse_attachment,
)
from aidial_adapter_openai.dial_api.storage import FileStorage
from aidial_adapter_openai.utils.image import ImageMetadata
from aidial_adapter_openai.utils.image import ImageDetail, ImageMetadata
from aidial_adapter_openai.utils.log_config import logger
from aidial_adapter_openai.utils.multi_modal_message import (
MultiModalMessage,
Expand Down Expand Up @@ -42,12 +42,15 @@ class Config:
errors: Set[TransformationError] = Field(default_factory=set)

def collect_resource(
self, meta: List[ImageMetadata], result: Resource | TransformationError
self,
meta: List[ImageMetadata],
result: Resource | TransformationError,
detail: Optional[ImageDetail],
):
if isinstance(result, TransformationError):
self.errors.add(result)
else:
meta.append(ImageMetadata.from_resource(result))
meta.append(ImageMetadata.from_resource(result, detail))

async def try_download_resource(
self, dial_resource: DialResource
Expand Down Expand Up @@ -84,7 +87,7 @@ async def download_attachment_images(
supported_types=SUPPORTED_IMAGE_TYPES,
)
result = await self.try_download_resource(dial_resource)
self.collect_resource(ret, result)
self.collect_resource(ret, result, None)

return ret

Expand All @@ -98,13 +101,17 @@ async def download_content_images(

for content_part in content:
if image_url := content_part.get("image_url", {}).get("url"):
image_detail = content_part.get("detail")
if image_detail not in [None, "auto", "low", "high"]:
raise ValidationError("Unexpected image detail")

dial_resource = URLResource(
url=image_url,
entity_name="image",
supported_types=SUPPORTED_IMAGE_TYPES,
)
result = await self.try_download_resource(dial_resource)
self.collect_resource(ret, result)
self.collect_resource(ret, result, image_detail)

return ret

Expand Down
8 changes: 5 additions & 3 deletions aidial_adapter_openai/utils/image.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from io import BytesIO
from typing import Literal, assert_never
from typing import Literal, Optional, assert_never

from PIL import Image
from pydantic import BaseModel
Expand Down Expand Up @@ -36,13 +36,15 @@ class ImageMetadata(BaseModel):
detail: DetailLevel

@classmethod
def from_resource(cls, image: Resource) -> "ImageMetadata":
def from_resource(
cls, image: Resource, detail: Optional[ImageDetail]
) -> "ImageMetadata":
with Image.open(BytesIO(image.data)) as img:
width, height = img.size

return cls(
image=image,
width=width,
height=height,
detail=resolve_detail_level(width, height, "auto"),
detail=resolve_detail_level(width, height, detail or "auto"),
)

0 comments on commit 83b9dd2

Please sign in to comment.