From 63c39afaf1da5c4996d5f9d338f2c26131c0ff4e Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Tue, 26 Nov 2024 13:36:53 +0200 Subject: [PATCH 01/10] Replace usage of env variables with application config in app state --- aidial_adapter_openai/app.py | 165 ++++++++++++------ aidial_adapter_openai/app_config.py | 39 +++++ aidial_adapter_openai/completions.py | 7 +- aidial_adapter_openai/constant.py | 0 .../utils/image_tokenizer.py | 24 +-- 5 files changed, 163 insertions(+), 72 deletions(-) create mode 100644 aidial_adapter_openai/app_config.py delete mode 100644 aidial_adapter_openai/constant.py diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index 11ce937..e6cceb8 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -1,4 +1,5 @@ from contextlib import asynccontextmanager +from typing import Annotated import pydantic from aidial_sdk._errors import pydantic_validation_exception_handler @@ -6,7 +7,7 @@ from aidial_sdk.exceptions import InvalidRequestError from aidial_sdk.telemetry.init import init_telemetry from aidial_sdk.telemetry.types import TelemetryConfig -from fastapi import FastAPI, Request +from fastapi import Depends, FastAPI, Request from fastapi.responses import Response from openai import ( APIConnectionError, @@ -16,6 +17,7 @@ OpenAIError, ) +from aidial_adapter_openai.app_config import ApplicationConfig from aidial_adapter_openai.completions import chat_completion as completion from aidial_adapter_openai.dalle3 import ( chat_completion as dalle3_chat_completion, @@ -30,17 +32,6 @@ from aidial_adapter_openai.embeddings.openai import ( embeddings as openai_embeddings, ) -from aidial_adapter_openai.env import ( - API_VERSIONS_MAPPING, - AZURE_AI_VISION_DEPLOYMENTS, - DALLE3_AZURE_API_VERSION, - DALLE3_DEPLOYMENTS, - DATABRICKS_DEPLOYMENTS, - GPT4_VISION_DEPLOYMENTS, - MISTRAL_DEPLOYMENTS, - MODEL_ALIASES, - NON_STREAMING_DEPLOYMENTS, -) from aidial_adapter_openai.gpt import gpt_chat_completion from aidial_adapter_openai.gpt4_multi_modal.chat_completion import ( gpt4_vision_chat_completion, @@ -51,7 +42,10 @@ ) from aidial_adapter_openai.utils.auth import get_credentials from aidial_adapter_openai.utils.http_client import get_http_client -from aidial_adapter_openai.utils.image_tokenizer import get_image_tokenizer +from aidial_adapter_openai.utils.image_tokenizer import ( + ImageTokenizer, + get_image_tokenizer, +) from aidial_adapter_openai.utils.log_config import configure_loggers, logger from aidial_adapter_openai.utils.parsers import completions_parser, parse_body from aidial_adapter_openai.utils.streaming import create_server_response @@ -68,16 +62,35 @@ async def lifespan(app: FastAPI): await get_http_client().aclose() -app = FastAPI(lifespan=lifespan) +def create_app( + app_config: ApplicationConfig | None = None, + to_init_telemetry: bool = True, + to_configure_loggers: bool = True, +) -> FastAPI: + app = FastAPI(lifespan=lifespan) + + if app_config is None: + app_config = ApplicationConfig.from_env() + + app.state.app_config = app_config + + if to_init_telemetry: + init_telemetry(app, TelemetryConfig()) + if to_configure_loggers: + configure_loggers() -init_telemetry(app, TelemetryConfig()) -configure_loggers() + return app -def get_api_version(request: Request): +def get_app_config(request: Request) -> ApplicationConfig: + return request.app.state.app_config + + +def get_api_version(request: Request) -> str: api_version = request.query_params.get("api-version", "") - api_version = API_VERSIONS_MAPPING.get(api_version, api_version) + app_config = get_app_config(request) + api_version = app_config.API_VERSIONS_MAPPING.get(api_version, api_version) if api_version == "": raise InvalidRequestError("api-version is a required query parameter") @@ -85,26 +98,52 @@ def get_api_version(request: Request): return api_version +def _get_image_tokenizer( + deployment_id: str, app_config: ApplicationConfig +) -> ImageTokenizer: + image_tokenizer = get_image_tokenizer(deployment_id, app_config) + if not image_tokenizer: + raise RuntimeError( + f"No image tokenizer found for deployment {deployment_id}" + ) + return image_tokenizer + + +app = create_app() + + @app.post("/openai/deployments/{deployment_id:path}/chat/completions") -async def chat_completion(deployment_id: str, request: Request): +async def chat_completion( + deployment_id: str, + request: Request, + app_config: Annotated[ApplicationConfig, Depends(get_app_config)], +): data = await parse_body(request) is_stream = bool(data.get("stream")) - emulate_streaming = deployment_id in NON_STREAMING_DEPLOYMENTS and is_stream + emulate_streaming = ( + deployment_id in app_config.NON_STREAMING_DEPLOYMENTS and is_stream + ) if emulate_streaming: data["stream"] = False return create_server_response( emulate_streaming, - await call_chat_completion(deployment_id, data, is_stream, request), + await call_chat_completion( + deployment_id, data, is_stream, request, app_config + ), ) async def call_chat_completion( - deployment_id: str, data: dict, is_stream: bool, request: Request + deployment_id: str, + data: dict, + is_stream: bool, + request: Request, + app_config: ApplicationConfig, ): # Azure OpenAI deployments ignore "model" request field, @@ -129,9 +168,9 @@ async def call_chat_completion( creds, api_version, deployment_id, + app_config, ) - - if deployment_id in DALLE3_DEPLOYMENTS: + if deployment_id in app_config.DALLE3_DEPLOYMENTS: storage = create_file_storage("images", request.headers) return await dalle3_chat_completion( data, @@ -139,46 +178,52 @@ async def call_chat_completion( creds, is_stream, storage, - DALLE3_AZURE_API_VERSION, + app_config.DALLE3_AZURE_API_VERSION, ) - if deployment_id in MISTRAL_DEPLOYMENTS: + if deployment_id in app_config.MISTRAL_DEPLOYMENTS: return await mistral_chat_completion(data, upstream_endpoint, creds) - if deployment_id in DATABRICKS_DEPLOYMENTS: + if deployment_id in app_config.DATABRICKS_DEPLOYMENTS: return await databricks_chat_completion(data, upstream_endpoint, creds) - text_tokenizer_model = MODEL_ALIASES.get(deployment_id, deployment_id) + text_tokenizer_model = app_config.MODEL_ALIASES.get( + deployment_id, deployment_id + ) - if image_tokenizer := get_image_tokenizer(deployment_id): - storage = create_file_storage("images", request.headers) + if deployment_id in app_config.GPT4_VISION_DEPLOYMENTS: + tokenizer = MultiModalTokenizer( + "gpt-4", _get_image_tokenizer(deployment_id, app_config) + ) + return await gpt4_vision_chat_completion( + data, + deployment_id, + upstream_endpoint, + creds, + is_stream, + create_file_storage("images", request.headers), + api_version, + tokenizer, + ) - if deployment_id in GPT4_VISION_DEPLOYMENTS: - tokenizer = MultiModalTokenizer("gpt-4", image_tokenizer) - return await gpt4_vision_chat_completion( - data, - deployment_id, - upstream_endpoint, - creds, - is_stream, - storage, - api_version, - tokenizer, - ) - else: - tokenizer = MultiModalTokenizer( - text_tokenizer_model, image_tokenizer - ) - return await gpt4o_chat_completion( - data, - deployment_id, - upstream_endpoint, - creds, - is_stream, - storage, - api_version, - tokenizer, - ) + if deployment_id in ( + *app_config.GPT4O_DEPLOYMENTS, + *app_config.GPT4O_MINI_DEPLOYMENTS, + ): + tokenizer = MultiModalTokenizer( + text_tokenizer_model, + _get_image_tokenizer(deployment_id, app_config), + ) + return await gpt4o_chat_completion( + data, + deployment_id, + upstream_endpoint, + creds, + is_stream, + create_file_storage("images", request.headers), + api_version, + tokenizer, + ) tokenizer = PlainTextTokenizer(model=text_tokenizer_model) return await gpt_chat_completion( @@ -192,7 +237,11 @@ async def call_chat_completion( @app.post("/openai/deployments/{deployment_id:path}/embeddings") -async def embedding(deployment_id: str, request: Request): +async def embedding( + deployment_id: str, + request: Request, + app_config: Annotated[ApplicationConfig, Depends(get_app_config)], +): data = await parse_body(request) # See note for /chat/completions endpoint @@ -202,7 +251,7 @@ async def embedding(deployment_id: str, request: Request): api_version = get_api_version(request) upstream_endpoint = request.headers["X-UPSTREAM-ENDPOINT"] - if deployment_id in AZURE_AI_VISION_DEPLOYMENTS: + if deployment_id in app_config.AZURE_AI_VISION_DEPLOYMENTS: storage = create_file_storage("images", request.headers) return await azure_ai_vision_embeddings( creds, deployment_id, upstream_endpoint, storage, data diff --git a/aidial_adapter_openai/app_config.py b/aidial_adapter_openai/app_config.py new file mode 100644 index 0000000..45bdd94 --- /dev/null +++ b/aidial_adapter_openai/app_config.py @@ -0,0 +1,39 @@ +from typing import Dict, List + +from pydantic import BaseModel, Field + +import aidial_adapter_openai.env as env + + +class ApplicationConfig(BaseModel): + MODEL_ALIASES: Dict[str, str] = Field(default_factory=dict) + DALLE3_DEPLOYMENTS: List[str] = Field(default_factory=list) + GPT4_VISION_DEPLOYMENTS: List[str] = Field(default_factory=list) + MISTRAL_DEPLOYMENTS: List[str] = Field(default_factory=list) + DATABRICKS_DEPLOYMENTS: List[str] = Field(default_factory=list) + GPT4O_DEPLOYMENTS: List[str] = Field(default_factory=list) + GPT4O_MINI_DEPLOYMENTS: List[str] = Field(default_factory=list) + AZURE_AI_VISION_DEPLOYMENTS: List[str] = Field(default_factory=list) + API_VERSIONS_MAPPING: Dict[str, str] = Field(default_factory=dict) + COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES: Dict[str, str] = Field( + default_factory=dict + ) + DALLE3_AZURE_API_VERSION: str = Field(default="2024-02-01") + NON_STREAMING_DEPLOYMENTS: List[str] = Field(default_factory=list) + + @classmethod + def from_env(cls) -> "ApplicationConfig": + return cls( + MODEL_ALIASES=env.MODEL_ALIASES, + DALLE3_DEPLOYMENTS=env.DALLE3_DEPLOYMENTS, + GPT4_VISION_DEPLOYMENTS=env.GPT4_VISION_DEPLOYMENTS, + MISTRAL_DEPLOYMENTS=env.MISTRAL_DEPLOYMENTS, + DATABRICKS_DEPLOYMENTS=env.DATABRICKS_DEPLOYMENTS, + GPT4O_DEPLOYMENTS=env.GPT4O_DEPLOYMENTS, + GPT4O_MINI_DEPLOYMENTS=env.GPT4O_MINI_DEPLOYMENTS, + AZURE_AI_VISION_DEPLOYMENTS=env.AZURE_AI_VISION_DEPLOYMENTS, + API_VERSIONS_MAPPING=env.API_VERSIONS_MAPPING, + COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES=env.COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES, + DALLE3_AZURE_API_VERSION=env.DALLE3_AZURE_API_VERSION, + NON_STREAMING_DEPLOYMENTS=env.NON_STREAMING_DEPLOYMENTS, + ) diff --git a/aidial_adapter_openai/completions.py b/aidial_adapter_openai/completions.py index 90834b5..4f11027 100644 --- a/aidial_adapter_openai/completions.py +++ b/aidial_adapter_openai/completions.py @@ -4,7 +4,7 @@ from openai import AsyncStream from openai.types import Completion -from aidial_adapter_openai.env import COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES +from aidial_adapter_openai.app_config import ApplicationConfig from aidial_adapter_openai.utils.auth import OpenAICreds from aidial_adapter_openai.utils.parsers import ( AzureOpenAIEndpoint, @@ -46,6 +46,7 @@ async def chat_completion( creds: OpenAICreds, api_version: str, deployment_id: str, + app_config: ApplicationConfig, ): if data.get("n") or 1 > 1: @@ -60,7 +61,9 @@ async def chat_completion( prompt = messages[-1].get("content") or "" if ( - template := COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES.get(deployment_id) + template := app_config.COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES.get( + deployment_id + ) ) is not None: prompt = template.format(prompt=prompt) diff --git a/aidial_adapter_openai/constant.py b/aidial_adapter_openai/constant.py deleted file mode 100644 index e69de29..0000000 diff --git a/aidial_adapter_openai/utils/image_tokenizer.py b/aidial_adapter_openai/utils/image_tokenizer.py index 0fe5bf9..b6d328d 100644 --- a/aidial_adapter_openai/utils/image_tokenizer.py +++ b/aidial_adapter_openai/utils/image_tokenizer.py @@ -8,11 +8,7 @@ from pydantic import BaseModel -from aidial_adapter_openai.env import ( - GPT4_VISION_DEPLOYMENTS, - GPT4O_DEPLOYMENTS, - GPT4O_MINI_DEPLOYMENTS, -) +from aidial_adapter_openai.app_config import ApplicationConfig from aidial_adapter_openai.utils.image import ImageDetail, resolve_detail_level @@ -58,14 +54,18 @@ def _compute_high_detail_tokens(self, width: int, height: int) -> int: low_detail_tokens=2833, tokens_per_tile=5667 ) -_TOKENIZERS: List[Tuple[ImageTokenizer, List[str]]] = [ - (GPT4O_IMAGE_TOKENIZER, GPT4O_DEPLOYMENTS), - (GPT4O_MINI_IMAGE_TOKENIZER, GPT4O_MINI_DEPLOYMENTS), - (GPT4_VISION_IMAGE_TOKENIZER, GPT4_VISION_DEPLOYMENTS), -] - -def get_image_tokenizer(deployment_id: str) -> ImageTokenizer | None: +def get_image_tokenizer( + deployment_id: str, app_config: ApplicationConfig +) -> ImageTokenizer | None: + _TOKENIZERS: List[Tuple[ImageTokenizer, List[str]]] = [ + (GPT4O_IMAGE_TOKENIZER, app_config.GPT4O_DEPLOYMENTS), + (GPT4O_MINI_IMAGE_TOKENIZER, app_config.GPT4O_MINI_DEPLOYMENTS), + ( + GPT4_VISION_IMAGE_TOKENIZER, + app_config.GPT4_VISION_DEPLOYMENTS, + ), + ] for tokenizer, ids in _TOKENIZERS: if deployment_id in ids: return tokenizer From 6fd770ed6328d9a47dc29eebc1ce81221712a66d Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Wed, 27 Nov 2024 10:30:07 +0200 Subject: [PATCH 02/10] Big refactoring, address comments in PR --- aidial_adapter_openai/app.py | 275 +++--------------- aidial_adapter_openai/app_config.py | 95 ++++-- aidial_adapter_openai/env.py | 47 --- aidial_adapter_openai/gpt.py | 2 + .../gpt4_multi_modal/chat_completion.py | 6 + aidial_adapter_openai/routers/__init__.py | 0 .../routers/chat_completion.py | 159 ++++++++++ aidial_adapter_openai/routers/embeddings.py | 41 +++ .../utils/image_tokenizer.py | 26 +- aidial_adapter_openai/utils/request.py | 27 ++ aidial_adapter_openai/utils/streaming.py | 6 +- tests/conftest.py | 26 +- 12 files changed, 366 insertions(+), 344 deletions(-) delete mode 100644 aidial_adapter_openai/env.py create mode 100644 aidial_adapter_openai/routers/__init__.py create mode 100644 aidial_adapter_openai/routers/chat_completion.py create mode 100644 aidial_adapter_openai/routers/embeddings.py create mode 100644 aidial_adapter_openai/utils/request.py diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index e6cceb8..cd348c8 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -1,13 +1,11 @@ from contextlib import asynccontextmanager -from typing import Annotated import pydantic from aidial_sdk._errors import pydantic_validation_exception_handler from aidial_sdk.exceptions import HTTPException as DialException -from aidial_sdk.exceptions import InvalidRequestError from aidial_sdk.telemetry.init import init_telemetry from aidial_sdk.telemetry.types import TelemetryConfig -from fastapi import Depends, FastAPI, Request +from fastapi import FastAPI, Request from fastapi.responses import Response from openai import ( APIConnectionError, @@ -18,41 +16,10 @@ ) from aidial_adapter_openai.app_config import ApplicationConfig -from aidial_adapter_openai.completions import chat_completion as completion -from aidial_adapter_openai.dalle3 import ( - chat_completion as dalle3_chat_completion, -) -from aidial_adapter_openai.databricks import ( - chat_completion as databricks_chat_completion, -) -from aidial_adapter_openai.dial_api.storage import create_file_storage -from aidial_adapter_openai.embeddings.azure_ai_vision import ( - embeddings as azure_ai_vision_embeddings, -) -from aidial_adapter_openai.embeddings.openai import ( - embeddings as openai_embeddings, -) -from aidial_adapter_openai.gpt import gpt_chat_completion -from aidial_adapter_openai.gpt4_multi_modal.chat_completion import ( - gpt4_vision_chat_completion, - gpt4o_chat_completion, -) -from aidial_adapter_openai.mistral import ( - chat_completion as mistral_chat_completion, -) -from aidial_adapter_openai.utils.auth import get_credentials +from aidial_adapter_openai.routers.chat_completion import chat_completion +from aidial_adapter_openai.routers.embeddings import embedding from aidial_adapter_openai.utils.http_client import get_http_client -from aidial_adapter_openai.utils.image_tokenizer import ( - ImageTokenizer, - get_image_tokenizer, -) from aidial_adapter_openai.utils.log_config import configure_loggers, logger -from aidial_adapter_openai.utils.parsers import completions_parser, parse_body -from aidial_adapter_openai.utils.streaming import create_server_response -from aidial_adapter_openai.utils.tokenizer import ( - MultiModalTokenizer, - PlainTextTokenizer, -) @asynccontextmanager @@ -62,205 +29,6 @@ async def lifespan(app: FastAPI): await get_http_client().aclose() -def create_app( - app_config: ApplicationConfig | None = None, - to_init_telemetry: bool = True, - to_configure_loggers: bool = True, -) -> FastAPI: - app = FastAPI(lifespan=lifespan) - - if app_config is None: - app_config = ApplicationConfig.from_env() - - app.state.app_config = app_config - - if to_init_telemetry: - init_telemetry(app, TelemetryConfig()) - - if to_configure_loggers: - configure_loggers() - - return app - - -def get_app_config(request: Request) -> ApplicationConfig: - return request.app.state.app_config - - -def get_api_version(request: Request) -> str: - api_version = request.query_params.get("api-version", "") - app_config = get_app_config(request) - api_version = app_config.API_VERSIONS_MAPPING.get(api_version, api_version) - - if api_version == "": - raise InvalidRequestError("api-version is a required query parameter") - - return api_version - - -def _get_image_tokenizer( - deployment_id: str, app_config: ApplicationConfig -) -> ImageTokenizer: - image_tokenizer = get_image_tokenizer(deployment_id, app_config) - if not image_tokenizer: - raise RuntimeError( - f"No image tokenizer found for deployment {deployment_id}" - ) - return image_tokenizer - - -app = create_app() - - -@app.post("/openai/deployments/{deployment_id:path}/chat/completions") -async def chat_completion( - deployment_id: str, - request: Request, - app_config: Annotated[ApplicationConfig, Depends(get_app_config)], -): - - data = await parse_body(request) - - is_stream = bool(data.get("stream")) - - emulate_streaming = ( - deployment_id in app_config.NON_STREAMING_DEPLOYMENTS and is_stream - ) - - if emulate_streaming: - data["stream"] = False - - return create_server_response( - emulate_streaming, - await call_chat_completion( - deployment_id, data, is_stream, request, app_config - ), - ) - - -async def call_chat_completion( - deployment_id: str, - data: dict, - is_stream: bool, - request: Request, - app_config: ApplicationConfig, -): - - # Azure OpenAI deployments ignore "model" request field, - # since the deployment id is already encoded in the endpoint path. - # This is not the case for non-Azure OpenAI deployments, so - # they require the "model" field to be set. - # However, openai==1.33.0 requires the "model" field for **both** - # Azure and non-Azure deployments. - # Therefore, we provide the "model" field for all deployments here. - # The same goes for /embeddings endpoint. - data["model"] = deployment_id - - creds = await get_credentials(request) - api_version = get_api_version(request) - - upstream_endpoint = request.headers["X-UPSTREAM-ENDPOINT"] - - if completions_endpoint := completions_parser.parse(upstream_endpoint): - return await completion( - data, - completions_endpoint, - creds, - api_version, - deployment_id, - app_config, - ) - if deployment_id in app_config.DALLE3_DEPLOYMENTS: - storage = create_file_storage("images", request.headers) - return await dalle3_chat_completion( - data, - upstream_endpoint, - creds, - is_stream, - storage, - app_config.DALLE3_AZURE_API_VERSION, - ) - - if deployment_id in app_config.MISTRAL_DEPLOYMENTS: - return await mistral_chat_completion(data, upstream_endpoint, creds) - - if deployment_id in app_config.DATABRICKS_DEPLOYMENTS: - return await databricks_chat_completion(data, upstream_endpoint, creds) - - text_tokenizer_model = app_config.MODEL_ALIASES.get( - deployment_id, deployment_id - ) - - if deployment_id in app_config.GPT4_VISION_DEPLOYMENTS: - tokenizer = MultiModalTokenizer( - "gpt-4", _get_image_tokenizer(deployment_id, app_config) - ) - return await gpt4_vision_chat_completion( - data, - deployment_id, - upstream_endpoint, - creds, - is_stream, - create_file_storage("images", request.headers), - api_version, - tokenizer, - ) - - if deployment_id in ( - *app_config.GPT4O_DEPLOYMENTS, - *app_config.GPT4O_MINI_DEPLOYMENTS, - ): - tokenizer = MultiModalTokenizer( - text_tokenizer_model, - _get_image_tokenizer(deployment_id, app_config), - ) - return await gpt4o_chat_completion( - data, - deployment_id, - upstream_endpoint, - creds, - is_stream, - create_file_storage("images", request.headers), - api_version, - tokenizer, - ) - - tokenizer = PlainTextTokenizer(model=text_tokenizer_model) - return await gpt_chat_completion( - data, - deployment_id, - upstream_endpoint, - creds, - api_version, - tokenizer, - ) - - -@app.post("/openai/deployments/{deployment_id:path}/embeddings") -async def embedding( - deployment_id: str, - request: Request, - app_config: Annotated[ApplicationConfig, Depends(get_app_config)], -): - data = await parse_body(request) - - # See note for /chat/completions endpoint - data["model"] = deployment_id - - creds = await get_credentials(request) - api_version = get_api_version(request) - upstream_endpoint = request.headers["X-UPSTREAM-ENDPOINT"] - - if deployment_id in app_config.AZURE_AI_VISION_DEPLOYMENTS: - storage = create_file_storage("images", request.headers) - return await azure_ai_vision_embeddings( - creds, deployment_id, upstream_endpoint, storage, data - ) - - return await openai_embeddings(creds, upstream_endpoint, api_version, data) - - -@app.exception_handler(OpenAIError) def openai_exception_handler(request: Request, e: DialException): if isinstance(e, APIStatusError): r = e.response @@ -303,16 +71,43 @@ def openai_exception_handler(request: Request, e: DialException): ) -@app.exception_handler(pydantic.ValidationError) def pydantic_exception_handler(request: Request, exc: pydantic.ValidationError): return pydantic_validation_exception_handler(request, exc) -@app.exception_handler(DialException) def dial_exception_handler(request: Request, exc: DialException): return exc.to_fastapi_response() -@app.get("/health") -def health(): - return {"status": "ok"} +def create_app( + app_config: ApplicationConfig | None = None, + to_init_telemetry: bool = True, +) -> FastAPI: + app = FastAPI(lifespan=lifespan) + + if app_config is None: + app_config = ApplicationConfig.from_env() + + app.state.app_config = app_config + + if to_init_telemetry: + init_telemetry(app, TelemetryConfig()) + + configure_loggers() + + @app.get("/health") + def health(): + return {"status": "ok"} + + app.post("/openai/deployments/{deployment_id:path}/embeddings")(embedding) + app.post("/openai/deployments/{deployment_id:path}/chat/completions")( + chat_completion + ) + app.exception_handler(OpenAIError)(openai_exception_handler) + app.exception_handler(pydantic.ValidationError)(pydantic_exception_handler) + app.exception_handler(DialException)(dial_exception_handler) + + return app + + +app = create_app() diff --git a/aidial_adapter_openai/app_config.py b/aidial_adapter_openai/app_config.py index 45bdd94..d024e4c 100644 --- a/aidial_adapter_openai/app_config.py +++ b/aidial_adapter_openai/app_config.py @@ -1,39 +1,78 @@ +import json +import os from typing import Dict, List -from pydantic import BaseModel, Field +from pydantic import BaseModel -import aidial_adapter_openai.env as env +from aidial_adapter_openai.utils.env import get_env_bool +from aidial_adapter_openai.utils.log_config import logger +from aidial_adapter_openai.utils.parsers import parse_deployment_list + + +def _get_eliminate_empty_choices() -> bool: + old_name = "FIX_STREAMING_ISSUES_IN_NEW_API_VERSIONS" + new_name = "ELIMINATE_EMPTY_CHOICES" + + if old_name in os.environ: + logger.warning( + f"{old_name} environment variable is deprecated. Use {new_name} instead." + ) + return get_env_bool(old_name, False) + + return get_env_bool(new_name, False) class ApplicationConfig(BaseModel): - MODEL_ALIASES: Dict[str, str] = Field(default_factory=dict) - DALLE3_DEPLOYMENTS: List[str] = Field(default_factory=list) - GPT4_VISION_DEPLOYMENTS: List[str] = Field(default_factory=list) - MISTRAL_DEPLOYMENTS: List[str] = Field(default_factory=list) - DATABRICKS_DEPLOYMENTS: List[str] = Field(default_factory=list) - GPT4O_DEPLOYMENTS: List[str] = Field(default_factory=list) - GPT4O_MINI_DEPLOYMENTS: List[str] = Field(default_factory=list) - AZURE_AI_VISION_DEPLOYMENTS: List[str] = Field(default_factory=list) - API_VERSIONS_MAPPING: Dict[str, str] = Field(default_factory=dict) - COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES: Dict[str, str] = Field( - default_factory=dict - ) - DALLE3_AZURE_API_VERSION: str = Field(default="2024-02-01") - NON_STREAMING_DEPLOYMENTS: List[str] = Field(default_factory=list) + MODEL_ALIASES: Dict[str, str] = {} + DALLE3_DEPLOYMENTS: List[str] = [] + GPT4_VISION_DEPLOYMENTS: List[str] = [] + MISTRAL_DEPLOYMENTS: List[str] = [] + DATABRICKS_DEPLOYMENTS: List[str] = [] + GPT4O_DEPLOYMENTS: List[str] = [] + GPT4O_MINI_DEPLOYMENTS: List[str] = [] + AZURE_AI_VISION_DEPLOYMENTS: List[str] = [] + API_VERSIONS_MAPPING: Dict[str, str] = {} + COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES: Dict[str, str] = {} + DALLE3_AZURE_API_VERSION: str = "2024-02-01" + NON_STREAMING_DEPLOYMENTS: List[str] = [] + ELIMINATE_EMPTY_CHOICES: bool = False @classmethod def from_env(cls) -> "ApplicationConfig": return cls( - MODEL_ALIASES=env.MODEL_ALIASES, - DALLE3_DEPLOYMENTS=env.DALLE3_DEPLOYMENTS, - GPT4_VISION_DEPLOYMENTS=env.GPT4_VISION_DEPLOYMENTS, - MISTRAL_DEPLOYMENTS=env.MISTRAL_DEPLOYMENTS, - DATABRICKS_DEPLOYMENTS=env.DATABRICKS_DEPLOYMENTS, - GPT4O_DEPLOYMENTS=env.GPT4O_DEPLOYMENTS, - GPT4O_MINI_DEPLOYMENTS=env.GPT4O_MINI_DEPLOYMENTS, - AZURE_AI_VISION_DEPLOYMENTS=env.AZURE_AI_VISION_DEPLOYMENTS, - API_VERSIONS_MAPPING=env.API_VERSIONS_MAPPING, - COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES=env.COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES, - DALLE3_AZURE_API_VERSION=env.DALLE3_AZURE_API_VERSION, - NON_STREAMING_DEPLOYMENTS=env.NON_STREAMING_DEPLOYMENTS, + MODEL_ALIASES=json.loads(os.getenv("MODEL_ALIASES", "{}")), + DALLE3_DEPLOYMENTS=parse_deployment_list( + os.getenv("DALLE3_DEPLOYMENTS") + ), + GPT4_VISION_DEPLOYMENTS=parse_deployment_list( + os.getenv("GPT4_VISION_DEPLOYMENTS") + ), + MISTRAL_DEPLOYMENTS=parse_deployment_list( + os.getenv("MISTRAL_DEPLOYMENTS") + ), + DATABRICKS_DEPLOYMENTS=parse_deployment_list( + os.getenv("DATABRICKS_DEPLOYMENTS") + ), + GPT4O_DEPLOYMENTS=parse_deployment_list( + os.getenv("GPT4O_DEPLOYMENTS") + ), + GPT4O_MINI_DEPLOYMENTS=parse_deployment_list( + os.getenv("GPT4O_MINI_DEPLOYMENTS") + ), + AZURE_AI_VISION_DEPLOYMENTS=parse_deployment_list( + os.getenv("AZURE_AI_VISION_DEPLOYMENTS") + ), + API_VERSIONS_MAPPING=json.loads( + os.getenv("API_VERSIONS_MAPPING", "{}") + ), + COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES=json.loads( + os.getenv("COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES") or "{}" + ), + DALLE3_AZURE_API_VERSION=os.getenv( + "DALLE3_AZURE_API_VERSION", "2024-02-01" + ), + NON_STREAMING_DEPLOYMENTS=parse_deployment_list( + os.getenv("NON_STREAMING_DEPLOYMENTS") + ), + ELIMINATE_EMPTY_CHOICES=_get_eliminate_empty_choices(), ) diff --git a/aidial_adapter_openai/env.py b/aidial_adapter_openai/env.py deleted file mode 100644 index e55ddea..0000000 --- a/aidial_adapter_openai/env.py +++ /dev/null @@ -1,47 +0,0 @@ -import json -import os -from typing import Dict - -from aidial_adapter_openai.utils.env import get_env_bool -from aidial_adapter_openai.utils.log_config import logger -from aidial_adapter_openai.utils.parsers import parse_deployment_list - -MODEL_ALIASES: Dict[str, str] = json.loads(os.getenv("MODEL_ALIASES", "{}")) -DALLE3_DEPLOYMENTS = parse_deployment_list(os.getenv("DALLE3_DEPLOYMENTS")) -GPT4_VISION_DEPLOYMENTS = parse_deployment_list( - os.getenv("GPT4_VISION_DEPLOYMENTS") -) -MISTRAL_DEPLOYMENTS = parse_deployment_list(os.getenv("MISTRAL_DEPLOYMENTS")) -DATABRICKS_DEPLOYMENTS = parse_deployment_list( - os.getenv("DATABRICKS_DEPLOYMENTS") -) -GPT4O_DEPLOYMENTS = parse_deployment_list(os.getenv("GPT4O_DEPLOYMENTS")) -GPT4O_MINI_DEPLOYMENTS = parse_deployment_list( - os.getenv("GPT4O_MINI_DEPLOYMENTS") -) -API_VERSIONS_MAPPING: Dict[str, str] = json.loads( - os.getenv("API_VERSIONS_MAPPING", "{}") -) -COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES: Dict[str, str] = json.loads( - os.getenv("COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES") or "{}" -) -DALLE3_AZURE_API_VERSION = os.getenv("DALLE3_AZURE_API_VERSION", "2024-02-01") -NON_STREAMING_DEPLOYMENTS = parse_deployment_list( - os.getenv("NON_STREAMING_DEPLOYMENTS") -) -AZURE_AI_VISION_DEPLOYMENTS = parse_deployment_list( - os.getenv("AZURE_AI_VISION_DEPLOYMENTS") -) - - -def get_eliminate_empty_choices() -> bool: - old_name = "FIX_STREAMING_ISSUES_IN_NEW_API_VERSIONS" - new_name = "ELIMINATE_EMPTY_CHOICES" - - if old_name in os.environ: - logger.warning( - f"{old_name} environment variable is deprecated. Use {new_name} instead." - ) - return get_env_bool(old_name, False) - - return get_env_bool(new_name, False) diff --git a/aidial_adapter_openai/gpt.py b/aidial_adapter_openai/gpt.py index 5d6610d..d4c6cde 100644 --- a/aidial_adapter_openai/gpt.py +++ b/aidial_adapter_openai/gpt.py @@ -44,6 +44,7 @@ async def gpt_chat_completion( creds: OpenAICreds, api_version: str, tokenizer: PlainTextTokenizer, + eliminate_empty_choices: bool, ): discarded_messages = None estimated_prompt_tokens = None @@ -83,6 +84,7 @@ async def gpt_chat_completion( deployment=deployment_id, discarded_messages=discarded_messages, stream=map_stream(chunk_to_dict, response), + eliminate_empty_choices=eliminate_empty_choices, ) else: rest = response.to_dict() diff --git a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py index 26d82c7..216137d 100644 --- a/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py +++ b/aidial_adapter_openai/gpt4_multi_modal/chat_completion.py @@ -143,6 +143,7 @@ async def gpt4o_chat_completion( file_storage: Optional[FileStorage], api_version: str, tokenizer: MultiModalTokenizer, + eliminate_empty_choices: bool, ): return await chat_completion( request, @@ -155,6 +156,7 @@ async def gpt4o_chat_completion( tokenizer, lambda x: x, None, + eliminate_empty_choices, ) @@ -167,6 +169,7 @@ async def gpt4_vision_chat_completion( file_storage: Optional[FileStorage], api_version: str, tokenizer: MultiModalTokenizer, + eliminate_empty_choices: bool, ): return await chat_completion( request, @@ -179,6 +182,7 @@ async def gpt4_vision_chat_completion( tokenizer, convert_gpt4v_to_gpt4_chunk, GPT4V_DEFAULT_MAX_TOKENS, + eliminate_empty_choices, ) @@ -193,6 +197,7 @@ async def chat_completion( tokenizer: MultiModalTokenizer, response_transformer: Callable[[dict], dict | None], default_max_tokens: Optional[int], + eliminate_empty_choices: bool, ): if request.get("n", 1) > 1: raise RequestValidationError("The deployment doesn't support n > 1") @@ -265,6 +270,7 @@ def debug_print(chunk: T) -> T: response_transformer, parse_openai_sse_stream(response), ), + eliminate_empty_choices=eliminate_empty_choices, ), ) else: diff --git a/aidial_adapter_openai/routers/__init__.py b/aidial_adapter_openai/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/aidial_adapter_openai/routers/chat_completion.py b/aidial_adapter_openai/routers/chat_completion.py new file mode 100644 index 0000000..d6f86ac --- /dev/null +++ b/aidial_adapter_openai/routers/chat_completion.py @@ -0,0 +1,159 @@ +from typing import Annotated + +from fastapi import Depends, Request + +from aidial_adapter_openai.app_config import ApplicationConfig +from aidial_adapter_openai.completions import chat_completion as completion +from aidial_adapter_openai.dalle3 import ( + chat_completion as dalle3_chat_completion, +) +from aidial_adapter_openai.databricks import ( + chat_completion as databricks_chat_completion, +) +from aidial_adapter_openai.dial_api.storage import create_file_storage +from aidial_adapter_openai.gpt import gpt_chat_completion +from aidial_adapter_openai.gpt4_multi_modal.chat_completion import ( + gpt4_vision_chat_completion, + gpt4o_chat_completion, +) +from aidial_adapter_openai.mistral import ( + chat_completion as mistral_chat_completion, +) +from aidial_adapter_openai.utils.auth import get_credentials +from aidial_adapter_openai.utils.image_tokenizer import get_image_tokenizer +from aidial_adapter_openai.utils.parsers import completions_parser, parse_body +from aidial_adapter_openai.utils.request import ( + get_api_version, + get_request_app_config, +) +from aidial_adapter_openai.utils.streaming import create_server_response +from aidial_adapter_openai.utils.tokenizer import ( + MultiModalTokenizer, + PlainTextTokenizer, +) + + +async def call_chat_completion( + deployment_id: str, + data: dict, + is_stream: bool, + request: Request, + app_config: ApplicationConfig, +): + + # Azure OpenAI deployments ignore "model" request field, + # since the deployment id is already encoded in the endpoint path. + # This is not the case for non-Azure OpenAI deployments, so + # they require the "model" field to be set. + # However, openai==1.33.0 requires the "model" field for **both** + # Azure and non-Azure deployments. + # Therefore, we provide the "model" field for all deployments here. + # The same goes for /embeddings endpoint. + data["model"] = deployment_id + + creds = await get_credentials(request) + api_version = get_api_version(request) + + upstream_endpoint = request.headers["X-UPSTREAM-ENDPOINT"] + + if completions_endpoint := completions_parser.parse(upstream_endpoint): + return await completion( + data, + completions_endpoint, + creds, + api_version, + deployment_id, + app_config, + ) + if deployment_id in app_config.DALLE3_DEPLOYMENTS: + storage = create_file_storage("images", request.headers) + return await dalle3_chat_completion( + data, + upstream_endpoint, + creds, + is_stream, + storage, + app_config.DALLE3_AZURE_API_VERSION, + ) + + if deployment_id in app_config.MISTRAL_DEPLOYMENTS: + return await mistral_chat_completion(data, upstream_endpoint, creds) + + if deployment_id in app_config.DATABRICKS_DEPLOYMENTS: + return await databricks_chat_completion(data, upstream_endpoint, creds) + + text_tokenizer_model = app_config.MODEL_ALIASES.get( + deployment_id, deployment_id + ) + + if deployment_id in app_config.GPT4_VISION_DEPLOYMENTS: + tokenizer = MultiModalTokenizer( + "gpt-4", get_image_tokenizer(deployment_id, app_config) + ) + return await gpt4_vision_chat_completion( + data, + deployment_id, + upstream_endpoint, + creds, + is_stream, + create_file_storage("images", request.headers), + api_version, + tokenizer, + app_config.ELIMINATE_EMPTY_CHOICES, + ) + + if deployment_id in ( + *app_config.GPT4O_DEPLOYMENTS, + *app_config.GPT4O_MINI_DEPLOYMENTS, + ): + tokenizer = MultiModalTokenizer( + text_tokenizer_model, + get_image_tokenizer(deployment_id, app_config), + ) + return await gpt4o_chat_completion( + data, + deployment_id, + upstream_endpoint, + creds, + is_stream, + create_file_storage("images", request.headers), + api_version, + tokenizer, + app_config.ELIMINATE_EMPTY_CHOICES, + ) + + tokenizer = PlainTextTokenizer(model=text_tokenizer_model) + return await gpt_chat_completion( + data, + deployment_id, + upstream_endpoint, + creds, + api_version, + tokenizer, + app_config.ELIMINATE_EMPTY_CHOICES, + ) + + +async def chat_completion( + deployment_id: str, + request: Request, + app_config: Annotated[ApplicationConfig, Depends(get_request_app_config)], +): + + data = await parse_body(request) + + is_stream = bool(data.get("stream")) + + emulate_streaming = ( + deployment_id in app_config.NON_STREAMING_DEPLOYMENTS and is_stream + ) + + if emulate_streaming: + data["stream"] = False + + return create_server_response( + emulate_streaming, + await call_chat_completion( + deployment_id, data, is_stream, request, app_config + ), + ) diff --git a/aidial_adapter_openai/routers/embeddings.py b/aidial_adapter_openai/routers/embeddings.py new file mode 100644 index 0000000..126d3ff --- /dev/null +++ b/aidial_adapter_openai/routers/embeddings.py @@ -0,0 +1,41 @@ +from typing import Annotated + +from fastapi import Depends, Request + +from aidial_adapter_openai.app_config import ApplicationConfig +from aidial_adapter_openai.dial_api.storage import create_file_storage +from aidial_adapter_openai.embeddings.azure_ai_vision import ( + embeddings as azure_ai_vision_embeddings, +) +from aidial_adapter_openai.embeddings.openai import ( + embeddings as openai_embeddings, +) +from aidial_adapter_openai.utils.auth import get_credentials +from aidial_adapter_openai.utils.parsers import parse_body +from aidial_adapter_openai.utils.request import ( + get_api_version, + get_request_app_config, +) + + +async def embedding( + deployment_id: str, + request: Request, + app_config: Annotated[ApplicationConfig, Depends(get_request_app_config)], +): + data = await parse_body(request) + + # See note for /chat/completions endpoint + data["model"] = deployment_id + + creds = await get_credentials(request) + api_version = get_api_version(request) + upstream_endpoint = request.headers["X-UPSTREAM-ENDPOINT"] + + if deployment_id in app_config.AZURE_AI_VISION_DEPLOYMENTS: + storage = create_file_storage("images", request.headers) + return await azure_ai_vision_embeddings( + creds, deployment_id, upstream_endpoint, storage, data + ) + + return await openai_embeddings(creds, upstream_endpoint, api_version, data) diff --git a/aidial_adapter_openai/utils/image_tokenizer.py b/aidial_adapter_openai/utils/image_tokenizer.py index b6d328d..01eb467 100644 --- a/aidial_adapter_openai/utils/image_tokenizer.py +++ b/aidial_adapter_openai/utils/image_tokenizer.py @@ -4,7 +4,7 @@ """ import math -from typing import List, Tuple, assert_never +from typing import assert_never from pydantic import BaseModel @@ -57,19 +57,17 @@ def _compute_high_detail_tokens(self, width: int, height: int) -> int: def get_image_tokenizer( deployment_id: str, app_config: ApplicationConfig -) -> ImageTokenizer | None: - _TOKENIZERS: List[Tuple[ImageTokenizer, List[str]]] = [ - (GPT4O_IMAGE_TOKENIZER, app_config.GPT4O_DEPLOYMENTS), - (GPT4O_MINI_IMAGE_TOKENIZER, app_config.GPT4O_MINI_DEPLOYMENTS), - ( - GPT4_VISION_IMAGE_TOKENIZER, - app_config.GPT4_VISION_DEPLOYMENTS, - ), - ] - for tokenizer, ids in _TOKENIZERS: - if deployment_id in ids: - return tokenizer - return None +) -> ImageTokenizer: + if deployment_id in app_config.GPT4O_DEPLOYMENTS: + return GPT4O_IMAGE_TOKENIZER + elif deployment_id in app_config.GPT4O_MINI_DEPLOYMENTS: + return GPT4O_MINI_IMAGE_TOKENIZER + elif deployment_id in app_config.GPT4_VISION_DEPLOYMENTS: + return GPT4_VISION_IMAGE_TOKENIZER + else: + raise RuntimeError( + f"No image tokenizer found for deployment {deployment_id}" + ) def _fit_longest(width: int, height: int, size: int) -> tuple[int, int]: diff --git a/aidial_adapter_openai/utils/request.py b/aidial_adapter_openai/utils/request.py new file mode 100644 index 0000000..c8d6457 --- /dev/null +++ b/aidial_adapter_openai/utils/request.py @@ -0,0 +1,27 @@ +from aidial_sdk.exceptions import InvalidRequestError +from fastapi import FastAPI, Request + +from aidial_adapter_openai.app_config import ApplicationConfig + + +def set_app_config(app: FastAPI, app_config: ApplicationConfig): + app.state.app_config = app_config + + +def get_app_config(app: FastAPI) -> ApplicationConfig: + return app.state.app_config + + +def get_request_app_config(request: Request) -> ApplicationConfig: + return get_app_config(request.app) + + +def get_api_version(request: Request) -> str: + api_version = request.query_params.get("api-version", "") + app_config = get_request_app_config(request) + api_version = app_config.API_VERSIONS_MAPPING.get(api_version, api_version) + + if api_version == "": + raise InvalidRequestError("api-version is a required query parameter") + + return api_version diff --git a/aidial_adapter_openai/utils/streaming.py b/aidial_adapter_openai/utils/streaming.py index d677ef7..724ae00 100644 --- a/aidial_adapter_openai/utils/streaming.py +++ b/aidial_adapter_openai/utils/streaming.py @@ -10,7 +10,6 @@ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from pydantic import BaseModel -from aidial_adapter_openai.env import get_eliminate_empty_choices from aidial_adapter_openai.utils.chat_completion_response import ( ChatCompletionResponse, ChatCompletionStreamingChunk, @@ -18,8 +17,6 @@ from aidial_adapter_openai.utils.log_config import logger from aidial_adapter_openai.utils.sse_stream import to_openai_sse_stream -ELIMINATE_EMPTY_CHOICES = get_eliminate_empty_choices() - def generate_id() -> str: return "chatcmpl-" + str(uuid4()) @@ -62,6 +59,7 @@ async def generate_stream( deployment: str, discarded_messages: Optional[list[int]], stream: AsyncIterator[dict], + eliminate_empty_choices: bool, ) -> AsyncIterator[dict]: empty_chunk = build_chunk( @@ -116,7 +114,7 @@ def set_discarded_messages(chunk: dict | None, indices: list[int]) -> dict: # when content filtering is enabled for a corresponding deployment. # The safety rating of the request is reported in this first chunk. # Here we withhold such a chunk and merge it later with a follow-up chunk. - if len(choices) == 0 and ELIMINATE_EMPTY_CHOICES: + if len(choices) == 0 and eliminate_empty_choices: buffer_chunk = chunk else: if last_chunk is not None: diff --git a/tests/conftest.py b/tests/conftest.py index 811cab1..a8efc95 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,25 +1,29 @@ -from unittest.mock import patch - import httpx import pytest import pytest_asyncio from httpx import ASGITransport -from aidial_adapter_openai.app import app +from aidial_adapter_openai.app import create_app +from aidial_adapter_openai.utils.request import get_app_config -@pytest.fixture -def eliminate_empty_choices(): - with patch( - "aidial_adapter_openai.utils.streaming.ELIMINATE_EMPTY_CHOICES", True - ): - yield +@pytest_asyncio.fixture +def _app_instance(): + return create_app() @pytest_asyncio.fixture -async def test_app(): +async def test_app(_app_instance): async with httpx.AsyncClient( - transport=ASGITransport(app=app), # type: ignore + transport=ASGITransport(app=_app_instance), base_url="http://test-app.com", ) as client: yield client + + +@pytest.fixture +def eliminate_empty_choices(_app_instance): + app_config = get_app_config(_app_instance) + app_config.ELIMINATE_EMPTY_CHOICES = True + yield + app_config.ELIMINATE_EMPTY_CHOICES = False From 10aba8ce575b64735760c799f6824c84681f92c5 Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Wed, 27 Nov 2024 10:32:22 +0200 Subject: [PATCH 03/10] Refactor --- aidial_adapter_openai/app.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index cd348c8..1a264ec 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -20,6 +20,7 @@ from aidial_adapter_openai.routers.embeddings import embedding from aidial_adapter_openai.utils.http_client import get_http_client from aidial_adapter_openai.utils.log_config import configure_loggers, logger +from aidial_adapter_openai.utils.request import set_app_config @asynccontextmanager @@ -84,11 +85,7 @@ def create_app( to_init_telemetry: bool = True, ) -> FastAPI: app = FastAPI(lifespan=lifespan) - - if app_config is None: - app_config = ApplicationConfig.from_env() - - app.state.app_config = app_config + set_app_config(app, app_config or ApplicationConfig.from_env()) if to_init_telemetry: init_telemetry(app, TelemetryConfig()) From cd1b3df240686a50487169d97836695412846c95 Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Wed, 27 Nov 2024 10:39:00 +0200 Subject: [PATCH 04/10] One more refactor - move exception handlers to distinct file --- aidial_adapter_openai/app.py | 67 +++------------------ aidial_adapter_openai/exception_handlers.py | 56 +++++++++++++++++ 2 files changed, 63 insertions(+), 60 deletions(-) create mode 100644 aidial_adapter_openai/exception_handlers.py diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index 1a264ec..d7f0202 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -1,21 +1,18 @@ from contextlib import asynccontextmanager import pydantic -from aidial_sdk._errors import pydantic_validation_exception_handler from aidial_sdk.exceptions import HTTPException as DialException from aidial_sdk.telemetry.init import init_telemetry from aidial_sdk.telemetry.types import TelemetryConfig -from fastapi import FastAPI, Request -from fastapi.responses import Response -from openai import ( - APIConnectionError, - APIError, - APIStatusError, - APITimeoutError, - OpenAIError, -) +from fastapi import FastAPI +from openai import OpenAIError from aidial_adapter_openai.app_config import ApplicationConfig +from aidial_adapter_openai.exception_handlers import ( + dial_exception_handler, + openai_exception_handler, + pydantic_exception_handler, +) from aidial_adapter_openai.routers.chat_completion import chat_completion from aidial_adapter_openai.routers.embeddings import embedding from aidial_adapter_openai.utils.http_client import get_http_client @@ -30,56 +27,6 @@ async def lifespan(app: FastAPI): await get_http_client().aclose() -def openai_exception_handler(request: Request, e: DialException): - if isinstance(e, APIStatusError): - r = e.response - headers = r.headers - - # Avoid encoding the error message when the original response was encoded. - if "Content-Encoding" in headers: - del headers["Content-Encoding"] - - return Response( - content=r.content, - status_code=r.status_code, - headers=headers, - ) - - if isinstance(e, APITimeoutError): - raise DialException( - status_code=504, - type="timeout", - message="Request timed out", - display_message="Request timed out. Please try again later.", - ) - - if isinstance(e, APIConnectionError): - raise DialException( - status_code=502, - type="connection", - message="Error communicating with OpenAI", - display_message="OpenAI server is not responsive. Please try again later.", - ) - - if isinstance(e, APIError): - raise DialException( - status_code=getattr(e, "status_code", None) or 500, - message=e.message, - type=e.type, - code=e.code, - param=e.param, - display_message=None, - ) - - -def pydantic_exception_handler(request: Request, exc: pydantic.ValidationError): - return pydantic_validation_exception_handler(request, exc) - - -def dial_exception_handler(request: Request, exc: DialException): - return exc.to_fastapi_response() - - def create_app( app_config: ApplicationConfig | None = None, to_init_telemetry: bool = True, diff --git a/aidial_adapter_openai/exception_handlers.py b/aidial_adapter_openai/exception_handlers.py new file mode 100644 index 0000000..c98c122 --- /dev/null +++ b/aidial_adapter_openai/exception_handlers.py @@ -0,0 +1,56 @@ +import pydantic +from aidial_sdk._errors import pydantic_validation_exception_handler +from aidial_sdk.exceptions import HTTPException as DialException +from fastapi import Request +from fastapi.responses import Response +from openai import APIConnectionError, APIError, APIStatusError, APITimeoutError + + +def openai_exception_handler(request: Request, e: DialException): + if isinstance(e, APIStatusError): + r = e.response + headers = r.headers + + # Avoid encoding the error message when the original response was encoded. + if "Content-Encoding" in headers: + del headers["Content-Encoding"] + + return Response( + content=r.content, + status_code=r.status_code, + headers=headers, + ) + + if isinstance(e, APITimeoutError): + raise DialException( + status_code=504, + type="timeout", + message="Request timed out", + display_message="Request timed out. Please try again later.", + ) + + if isinstance(e, APIConnectionError): + raise DialException( + status_code=502, + type="connection", + message="Error communicating with OpenAI", + display_message="OpenAI server is not responsive. Please try again later.", + ) + + if isinstance(e, APIError): + raise DialException( + status_code=getattr(e, "status_code", None) or 500, + message=e.message, + type=e.type, + code=e.code, + param=e.param, + display_message=None, + ) + + +def pydantic_exception_handler(request: Request, exc: pydantic.ValidationError): + return pydantic_validation_exception_handler(request, exc) + + +def dial_exception_handler(request: Request, exc: DialException): + return exc.to_fastapi_response() From e16cef9040b0fab37066ee7a13eb7e1daa2ec05f Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Wed, 27 Nov 2024 10:42:33 +0200 Subject: [PATCH 05/10] Make app.py even cleaner --- aidial_adapter_openai/app.py | 14 ++++++-------- aidial_adapter_openai/routers/__init__.py | 3 +++ aidial_adapter_openai/routers/health.py | 2 ++ 3 files changed, 11 insertions(+), 8 deletions(-) create mode 100644 aidial_adapter_openai/routers/health.py diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index d7f0202..2b4334f 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -7,14 +7,13 @@ from fastapi import FastAPI from openai import OpenAIError +import aidial_adapter_openai.routers as routers from aidial_adapter_openai.app_config import ApplicationConfig from aidial_adapter_openai.exception_handlers import ( dial_exception_handler, openai_exception_handler, pydantic_exception_handler, ) -from aidial_adapter_openai.routers.chat_completion import chat_completion -from aidial_adapter_openai.routers.embeddings import embedding from aidial_adapter_openai.utils.http_client import get_http_client from aidial_adapter_openai.utils.log_config import configure_loggers, logger from aidial_adapter_openai.utils.request import set_app_config @@ -39,13 +38,12 @@ def create_app( configure_loggers() - @app.get("/health") - def health(): - return {"status": "ok"} - - app.post("/openai/deployments/{deployment_id:path}/embeddings")(embedding) + app.get("/health")(routers.health) + app.post("/openai/deployments/{deployment_id:path}/embeddings")( + routers.embedding + ) app.post("/openai/deployments/{deployment_id:path}/chat/completions")( - chat_completion + routers.chat_completion ) app.exception_handler(OpenAIError)(openai_exception_handler) app.exception_handler(pydantic.ValidationError)(pydantic_exception_handler) diff --git a/aidial_adapter_openai/routers/__init__.py b/aidial_adapter_openai/routers/__init__.py index e69de29..420e7fa 100644 --- a/aidial_adapter_openai/routers/__init__.py +++ b/aidial_adapter_openai/routers/__init__.py @@ -0,0 +1,3 @@ +from .chat_completion import chat_completion +from .embeddings import embedding +from .health import health diff --git a/aidial_adapter_openai/routers/health.py b/aidial_adapter_openai/routers/health.py new file mode 100644 index 0000000..834f2a3 --- /dev/null +++ b/aidial_adapter_openai/routers/health.py @@ -0,0 +1,2 @@ +def health(): + return {"status": "ok"} From 0b08f3665daab009295298fbcb66a467fceb74d9 Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Wed, 27 Nov 2024 10:47:32 +0200 Subject: [PATCH 06/10] Turn off telemetry for test app instance --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index a8efc95..876cdce 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,7 @@ @pytest_asyncio.fixture def _app_instance(): - return create_app() + return create_app(to_init_telemetry=False) @pytest_asyncio.fixture From 66faf7265ba616ddad3afde66dbd0d82cfdfd8a9 Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Thu, 28 Nov 2024 17:47:57 +0200 Subject: [PATCH 07/10] More refactoring due to PR comments --- aidial_adapter_openai/app.py | 16 +- aidial_adapter_openai/app_config.py | 119 ++++++++----- aidial_adapter_openai/constant.py | 11 ++ .../{routers => endpoints}/__init__.py | 0 .../endpoints/chat_completion.py | 160 ++++++++++++++++++ .../{routers => endpoints}/embeddings.py | 12 +- .../{routers => endpoints}/health.py | 0 .../routers/chat_completion.py | 159 ----------------- .../utils/image_tokenizer.py | 31 ++-- aidial_adapter_openai/utils/json.py | 2 + tests/conftest.py | 4 +- 11 files changed, 276 insertions(+), 238 deletions(-) create mode 100644 aidial_adapter_openai/constant.py rename aidial_adapter_openai/{routers => endpoints}/__init__.py (100%) create mode 100644 aidial_adapter_openai/endpoints/chat_completion.py rename aidial_adapter_openai/{routers => endpoints}/embeddings.py (79%) rename aidial_adapter_openai/{routers => endpoints}/health.py (100%) delete mode 100644 aidial_adapter_openai/routers/chat_completion.py create mode 100644 aidial_adapter_openai/utils/json.py diff --git a/aidial_adapter_openai/app.py b/aidial_adapter_openai/app.py index 2b4334f..5d07629 100644 --- a/aidial_adapter_openai/app.py +++ b/aidial_adapter_openai/app.py @@ -2,12 +2,12 @@ import pydantic from aidial_sdk.exceptions import HTTPException as DialException -from aidial_sdk.telemetry.init import init_telemetry +from aidial_sdk.telemetry.init import init_telemetry as sdk_init_telemetry from aidial_sdk.telemetry.types import TelemetryConfig from fastapi import FastAPI from openai import OpenAIError -import aidial_adapter_openai.routers as routers +import aidial_adapter_openai.endpoints as endpoints from aidial_adapter_openai.app_config import ApplicationConfig from aidial_adapter_openai.exception_handlers import ( dial_exception_handler, @@ -28,22 +28,22 @@ async def lifespan(app: FastAPI): def create_app( app_config: ApplicationConfig | None = None, - to_init_telemetry: bool = True, + init_telemetry: bool = True, ) -> FastAPI: app = FastAPI(lifespan=lifespan) set_app_config(app, app_config or ApplicationConfig.from_env()) - if to_init_telemetry: - init_telemetry(app, TelemetryConfig()) + if init_telemetry: + sdk_init_telemetry(app, TelemetryConfig()) configure_loggers() - app.get("/health")(routers.health) + app.get("/health")(endpoints.health) app.post("/openai/deployments/{deployment_id:path}/embeddings")( - routers.embedding + endpoints.embedding ) app.post("/openai/deployments/{deployment_id:path}/chat/completions")( - routers.chat_completion + endpoints.chat_completion ) app.exception_handler(OpenAIError)(openai_exception_handler) app.exception_handler(pydantic.ValidationError)(pydantic_exception_handler) diff --git a/aidial_adapter_openai/app_config.py b/aidial_adapter_openai/app_config.py index d024e4c..a59161a 100644 --- a/aidial_adapter_openai/app_config.py +++ b/aidial_adapter_openai/app_config.py @@ -4,24 +4,13 @@ from pydantic import BaseModel +from aidial_adapter_openai.constant import ChatCompletionDeploymentType from aidial_adapter_openai.utils.env import get_env_bool +from aidial_adapter_openai.utils.json import remove_nones from aidial_adapter_openai.utils.log_config import logger from aidial_adapter_openai.utils.parsers import parse_deployment_list -def _get_eliminate_empty_choices() -> bool: - old_name = "FIX_STREAMING_ISSUES_IN_NEW_API_VERSIONS" - new_name = "ELIMINATE_EMPTY_CHOICES" - - if old_name in os.environ: - logger.warning( - f"{old_name} environment variable is deprecated. Use {new_name} instead." - ) - return get_env_bool(old_name, False) - - return get_env_bool(new_name, False) - - class ApplicationConfig(BaseModel): MODEL_ALIASES: Dict[str, str] = {} DALLE3_DEPLOYMENTS: List[str] = [] @@ -37,42 +26,78 @@ class ApplicationConfig(BaseModel): NON_STREAMING_DEPLOYMENTS: List[str] = [] ELIMINATE_EMPTY_CHOICES: bool = False + def get_chat_completion_deployment_type( + self, deployment_id: str + ) -> ChatCompletionDeploymentType: + if deployment_id in self.DALLE3_DEPLOYMENTS: + return ChatCompletionDeploymentType.DALLE3 + elif deployment_id in self.GPT4_VISION_DEPLOYMENTS: + return ChatCompletionDeploymentType.GPT4_VISION + elif deployment_id in self.MISTRAL_DEPLOYMENTS: + return ChatCompletionDeploymentType.MISTRAL + elif deployment_id in self.DATABRICKS_DEPLOYMENTS: + return ChatCompletionDeploymentType.DATABRICKS + elif deployment_id in self.GPT4O_DEPLOYMENTS: + return ChatCompletionDeploymentType.GPT4O + elif deployment_id in self.GPT4O_MINI_DEPLOYMENTS: + return ChatCompletionDeploymentType.GPT4O_MINI + else: + return ChatCompletionDeploymentType.GPT_TEXT_ONLY + @classmethod def from_env(cls) -> "ApplicationConfig": + def _parse_env_deployments(deployments_key: str) -> List[str] | None: + return parse_deployment_list(os.getenv(deployments_key)) or None + + def _parse_env_dict(key: str) -> Dict[str, str] | None: + value = os.getenv(key) + return json.loads(value) if value else None + + def _parse_eliminate_empty_choices() -> bool | None: + old_name = "FIX_STREAMING_ISSUES_IN_NEW_API_VERSIONS" + new_name = "ELIMINATE_EMPTY_CHOICES" + + if old_name in os.environ: + logger.warning( + f"{old_name} environment variable is deprecated. Use {new_name} instead." + ) + return get_env_bool(old_name) + elif new_name in os.environ: + return get_env_bool(new_name) + + return None + + deployment_fields = { + deployment_key: _parse_env_deployments(deployment_key) + for deployment_key in ( + "DALLE3_DEPLOYMENTS", + "GPT4_VISION_DEPLOYMENTS", + "MISTRAL_DEPLOYMENTS", + "DATABRICKS_DEPLOYMENTS", + "GPT4O_DEPLOYMENTS", + "GPT4O_MINI_DEPLOYMENTS", + "AZURE_AI_VISION_DEPLOYMENTS", + "NON_STREAMING_DEPLOYMENTS", + ) + } + dict_fields = { + key: _parse_env_dict(key) + for key in ( + "MODEL_ALIASES", + "API_VERSIONS_MAPPING", + "COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES", + ) + } + return cls( - MODEL_ALIASES=json.loads(os.getenv("MODEL_ALIASES", "{}")), - DALLE3_DEPLOYMENTS=parse_deployment_list( - os.getenv("DALLE3_DEPLOYMENTS") - ), - GPT4_VISION_DEPLOYMENTS=parse_deployment_list( - os.getenv("GPT4_VISION_DEPLOYMENTS") - ), - MISTRAL_DEPLOYMENTS=parse_deployment_list( - os.getenv("MISTRAL_DEPLOYMENTS") - ), - DATABRICKS_DEPLOYMENTS=parse_deployment_list( - os.getenv("DATABRICKS_DEPLOYMENTS") - ), - GPT4O_DEPLOYMENTS=parse_deployment_list( - os.getenv("GPT4O_DEPLOYMENTS") - ), - GPT4O_MINI_DEPLOYMENTS=parse_deployment_list( - os.getenv("GPT4O_MINI_DEPLOYMENTS") - ), - AZURE_AI_VISION_DEPLOYMENTS=parse_deployment_list( - os.getenv("AZURE_AI_VISION_DEPLOYMENTS") - ), - API_VERSIONS_MAPPING=json.loads( - os.getenv("API_VERSIONS_MAPPING", "{}") - ), - COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES=json.loads( - os.getenv("COMPLETION_DEPLOYMENTS_PROMPT_TEMPLATES") or "{}" - ), - DALLE3_AZURE_API_VERSION=os.getenv( - "DALLE3_AZURE_API_VERSION", "2024-02-01" - ), - NON_STREAMING_DEPLOYMENTS=parse_deployment_list( - os.getenv("NON_STREAMING_DEPLOYMENTS") + **remove_nones( + { + **deployment_fields, + **dict_fields, + "DALLE3_AZURE_API_VERSION": os.getenv( + "DALLE3_AZURE_API_VERSION" + ), + "ELIMINATE_EMPTY_CHOICES": _parse_eliminate_empty_choices(), + } ), - ELIMINATE_EMPTY_CHOICES=_get_eliminate_empty_choices(), ) diff --git a/aidial_adapter_openai/constant.py b/aidial_adapter_openai/constant.py new file mode 100644 index 0000000..c24090f --- /dev/null +++ b/aidial_adapter_openai/constant.py @@ -0,0 +1,11 @@ +from enum import StrEnum, auto + + +class ChatCompletionDeploymentType(StrEnum): + DALLE3 = auto() + MISTRAL = auto() + DATABRICKS = auto() + GPT4_VISION = auto() + GPT4O = auto() + GPT4O_MINI = auto() + GPT_TEXT_ONLY = auto() diff --git a/aidial_adapter_openai/routers/__init__.py b/aidial_adapter_openai/endpoints/__init__.py similarity index 100% rename from aidial_adapter_openai/routers/__init__.py rename to aidial_adapter_openai/endpoints/__init__.py diff --git a/aidial_adapter_openai/endpoints/chat_completion.py b/aidial_adapter_openai/endpoints/chat_completion.py new file mode 100644 index 0000000..e7f03a6 --- /dev/null +++ b/aidial_adapter_openai/endpoints/chat_completion.py @@ -0,0 +1,160 @@ +from typing import assert_never + +from fastapi import Request + +from aidial_adapter_openai.app_config import ApplicationConfig +from aidial_adapter_openai.completions import chat_completion as completion +from aidial_adapter_openai.constant import ChatCompletionDeploymentType +from aidial_adapter_openai.dalle3 import ( + chat_completion as dalle3_chat_completion, +) +from aidial_adapter_openai.databricks import ( + chat_completion as databricks_chat_completion, +) +from aidial_adapter_openai.dial_api.storage import create_file_storage +from aidial_adapter_openai.gpt import gpt_chat_completion +from aidial_adapter_openai.gpt4_multi_modal.chat_completion import ( + gpt4_vision_chat_completion, + gpt4o_chat_completion, +) +from aidial_adapter_openai.mistral import ( + chat_completion as mistral_chat_completion, +) +from aidial_adapter_openai.utils.auth import get_credentials +from aidial_adapter_openai.utils.image_tokenizer import get_image_tokenizer +from aidial_adapter_openai.utils.parsers import completions_parser, parse_body +from aidial_adapter_openai.utils.request import ( + get_api_version, + get_request_app_config, +) +from aidial_adapter_openai.utils.streaming import create_server_response +from aidial_adapter_openai.utils.tokenizer import ( + MultiModalTokenizer, + PlainTextTokenizer, +) + + +async def call_chat_completion( + deployment_type: ChatCompletionDeploymentType, + deployment_id: str, + data: dict, + is_stream: bool, + request: Request, + app_config: ApplicationConfig, +): + + # Azure OpenAI deployments ignore "model" request field, + # since the deployment id is already encoded in the endpoint path. + # This is not the case for non-Azure OpenAI deployments, so + # they require the "model" field to be set. + # However, openai==1.33.0 requires the "model" field for **both** + # Azure and non-Azure deployments. + # Therefore, we provide the "model" field for all deployments here. + # The same goes for /embeddings endpoint. + data["model"] = deployment_id + + creds = await get_credentials(request) + api_version = get_api_version(request) + + upstream_endpoint = request.headers["X-UPSTREAM-ENDPOINT"] + + if completions_endpoint := completions_parser.parse(upstream_endpoint): + return await completion( + data, + completions_endpoint, + creds, + api_version, + deployment_id, + app_config, + ) + match deployment_type: + case ChatCompletionDeploymentType.DALLE3: + storage = create_file_storage("images", request.headers) + return await dalle3_chat_completion( + data, + upstream_endpoint, + creds, + is_stream, + storage, + app_config.DALLE3_AZURE_API_VERSION, + ) + case ChatCompletionDeploymentType.MISTRAL: + return await mistral_chat_completion(data, upstream_endpoint, creds) + case ChatCompletionDeploymentType.DATABRICKS: + return await databricks_chat_completion( + data, upstream_endpoint, creds + ) + case ChatCompletionDeploymentType.GPT4_VISION: + tokenizer = MultiModalTokenizer( + "gpt-4", get_image_tokenizer(deployment_type) + ) + return await gpt4_vision_chat_completion( + data, + deployment_id, + upstream_endpoint, + creds, + is_stream, + create_file_storage("images", request.headers), + api_version, + tokenizer, + app_config.ELIMINATE_EMPTY_CHOICES, + ) + case ( + ChatCompletionDeploymentType.GPT4O + | ChatCompletionDeploymentType.GPT4O_MINI + ): + + tokenizer = MultiModalTokenizer( + app_config.MODEL_ALIASES.get(deployment_id, deployment_id), + get_image_tokenizer(deployment_type), + ) + return await gpt4o_chat_completion( + data, + deployment_id, + upstream_endpoint, + creds, + is_stream, + create_file_storage("images", request.headers), + api_version, + tokenizer, + app_config.ELIMINATE_EMPTY_CHOICES, + ) + case ChatCompletionDeploymentType.GPT_TEXT_ONLY: + tokenizer = PlainTextTokenizer( + model=app_config.MODEL_ALIASES.get(deployment_id, deployment_id) + ) + return await gpt_chat_completion( + data, + deployment_id, + upstream_endpoint, + creds, + api_version, + tokenizer, + app_config.ELIMINATE_EMPTY_CHOICES, + ) + case _: + assert_never(deployment_type) + + +async def chat_completion(deployment_id: str, request: Request): + app_config = get_request_app_config(request) + data = await parse_body(request) + + deployment_type = app_config.get_chat_completion_deployment_type( + deployment_id + ) + is_stream = bool(data.get("stream")) + + emulate_streaming = ( + deployment_id in app_config.NON_STREAMING_DEPLOYMENTS and is_stream + ) + + if emulate_streaming: + data["stream"] = False + + return create_server_response( + emulate_streaming, + await call_chat_completion( + deployment_type, deployment_id, data, is_stream, request, app_config + ), + ) diff --git a/aidial_adapter_openai/routers/embeddings.py b/aidial_adapter_openai/endpoints/embeddings.py similarity index 79% rename from aidial_adapter_openai/routers/embeddings.py rename to aidial_adapter_openai/endpoints/embeddings.py index 126d3ff..9960036 100644 --- a/aidial_adapter_openai/routers/embeddings.py +++ b/aidial_adapter_openai/endpoints/embeddings.py @@ -1,8 +1,5 @@ -from typing import Annotated +from fastapi import Request -from fastapi import Depends, Request - -from aidial_adapter_openai.app_config import ApplicationConfig from aidial_adapter_openai.dial_api.storage import create_file_storage from aidial_adapter_openai.embeddings.azure_ai_vision import ( embeddings as azure_ai_vision_embeddings, @@ -18,11 +15,8 @@ ) -async def embedding( - deployment_id: str, - request: Request, - app_config: Annotated[ApplicationConfig, Depends(get_request_app_config)], -): +async def embedding(deployment_id: str, request: Request): + app_config = get_request_app_config(request) data = await parse_body(request) # See note for /chat/completions endpoint diff --git a/aidial_adapter_openai/routers/health.py b/aidial_adapter_openai/endpoints/health.py similarity index 100% rename from aidial_adapter_openai/routers/health.py rename to aidial_adapter_openai/endpoints/health.py diff --git a/aidial_adapter_openai/routers/chat_completion.py b/aidial_adapter_openai/routers/chat_completion.py deleted file mode 100644 index d6f86ac..0000000 --- a/aidial_adapter_openai/routers/chat_completion.py +++ /dev/null @@ -1,159 +0,0 @@ -from typing import Annotated - -from fastapi import Depends, Request - -from aidial_adapter_openai.app_config import ApplicationConfig -from aidial_adapter_openai.completions import chat_completion as completion -from aidial_adapter_openai.dalle3 import ( - chat_completion as dalle3_chat_completion, -) -from aidial_adapter_openai.databricks import ( - chat_completion as databricks_chat_completion, -) -from aidial_adapter_openai.dial_api.storage import create_file_storage -from aidial_adapter_openai.gpt import gpt_chat_completion -from aidial_adapter_openai.gpt4_multi_modal.chat_completion import ( - gpt4_vision_chat_completion, - gpt4o_chat_completion, -) -from aidial_adapter_openai.mistral import ( - chat_completion as mistral_chat_completion, -) -from aidial_adapter_openai.utils.auth import get_credentials -from aidial_adapter_openai.utils.image_tokenizer import get_image_tokenizer -from aidial_adapter_openai.utils.parsers import completions_parser, parse_body -from aidial_adapter_openai.utils.request import ( - get_api_version, - get_request_app_config, -) -from aidial_adapter_openai.utils.streaming import create_server_response -from aidial_adapter_openai.utils.tokenizer import ( - MultiModalTokenizer, - PlainTextTokenizer, -) - - -async def call_chat_completion( - deployment_id: str, - data: dict, - is_stream: bool, - request: Request, - app_config: ApplicationConfig, -): - - # Azure OpenAI deployments ignore "model" request field, - # since the deployment id is already encoded in the endpoint path. - # This is not the case for non-Azure OpenAI deployments, so - # they require the "model" field to be set. - # However, openai==1.33.0 requires the "model" field for **both** - # Azure and non-Azure deployments. - # Therefore, we provide the "model" field for all deployments here. - # The same goes for /embeddings endpoint. - data["model"] = deployment_id - - creds = await get_credentials(request) - api_version = get_api_version(request) - - upstream_endpoint = request.headers["X-UPSTREAM-ENDPOINT"] - - if completions_endpoint := completions_parser.parse(upstream_endpoint): - return await completion( - data, - completions_endpoint, - creds, - api_version, - deployment_id, - app_config, - ) - if deployment_id in app_config.DALLE3_DEPLOYMENTS: - storage = create_file_storage("images", request.headers) - return await dalle3_chat_completion( - data, - upstream_endpoint, - creds, - is_stream, - storage, - app_config.DALLE3_AZURE_API_VERSION, - ) - - if deployment_id in app_config.MISTRAL_DEPLOYMENTS: - return await mistral_chat_completion(data, upstream_endpoint, creds) - - if deployment_id in app_config.DATABRICKS_DEPLOYMENTS: - return await databricks_chat_completion(data, upstream_endpoint, creds) - - text_tokenizer_model = app_config.MODEL_ALIASES.get( - deployment_id, deployment_id - ) - - if deployment_id in app_config.GPT4_VISION_DEPLOYMENTS: - tokenizer = MultiModalTokenizer( - "gpt-4", get_image_tokenizer(deployment_id, app_config) - ) - return await gpt4_vision_chat_completion( - data, - deployment_id, - upstream_endpoint, - creds, - is_stream, - create_file_storage("images", request.headers), - api_version, - tokenizer, - app_config.ELIMINATE_EMPTY_CHOICES, - ) - - if deployment_id in ( - *app_config.GPT4O_DEPLOYMENTS, - *app_config.GPT4O_MINI_DEPLOYMENTS, - ): - tokenizer = MultiModalTokenizer( - text_tokenizer_model, - get_image_tokenizer(deployment_id, app_config), - ) - return await gpt4o_chat_completion( - data, - deployment_id, - upstream_endpoint, - creds, - is_stream, - create_file_storage("images", request.headers), - api_version, - tokenizer, - app_config.ELIMINATE_EMPTY_CHOICES, - ) - - tokenizer = PlainTextTokenizer(model=text_tokenizer_model) - return await gpt_chat_completion( - data, - deployment_id, - upstream_endpoint, - creds, - api_version, - tokenizer, - app_config.ELIMINATE_EMPTY_CHOICES, - ) - - -async def chat_completion( - deployment_id: str, - request: Request, - app_config: Annotated[ApplicationConfig, Depends(get_request_app_config)], -): - - data = await parse_body(request) - - is_stream = bool(data.get("stream")) - - emulate_streaming = ( - deployment_id in app_config.NON_STREAMING_DEPLOYMENTS and is_stream - ) - - if emulate_streaming: - data["stream"] = False - - return create_server_response( - emulate_streaming, - await call_chat_completion( - deployment_id, data, is_stream, request, app_config - ), - ) diff --git a/aidial_adapter_openai/utils/image_tokenizer.py b/aidial_adapter_openai/utils/image_tokenizer.py index 01eb467..7f9eb79 100644 --- a/aidial_adapter_openai/utils/image_tokenizer.py +++ b/aidial_adapter_openai/utils/image_tokenizer.py @@ -4,11 +4,11 @@ """ import math -from typing import assert_never +from typing import Literal, assert_never from pydantic import BaseModel -from aidial_adapter_openai.app_config import ApplicationConfig +from aidial_adapter_openai.constant import ChatCompletionDeploymentType from aidial_adapter_openai.utils.image import ImageDetail, resolve_detail_level @@ -54,20 +54,25 @@ def _compute_high_detail_tokens(self, width: int, height: int) -> int: low_detail_tokens=2833, tokens_per_tile=5667 ) +MultiModalDeployments = Literal[ + ChatCompletionDeploymentType.GPT4O, + ChatCompletionDeploymentType.GPT4O_MINI, + ChatCompletionDeploymentType.GPT4_VISION, +] + def get_image_tokenizer( - deployment_id: str, app_config: ApplicationConfig + deployment_type: MultiModalDeployments, ) -> ImageTokenizer: - if deployment_id in app_config.GPT4O_DEPLOYMENTS: - return GPT4O_IMAGE_TOKENIZER - elif deployment_id in app_config.GPT4O_MINI_DEPLOYMENTS: - return GPT4O_MINI_IMAGE_TOKENIZER - elif deployment_id in app_config.GPT4_VISION_DEPLOYMENTS: - return GPT4_VISION_IMAGE_TOKENIZER - else: - raise RuntimeError( - f"No image tokenizer found for deployment {deployment_id}" - ) + match deployment_type: + case ChatCompletionDeploymentType.GPT4O: + return GPT4O_IMAGE_TOKENIZER + case ChatCompletionDeploymentType.GPT4O_MINI: + return GPT4O_MINI_IMAGE_TOKENIZER + case ChatCompletionDeploymentType.GPT4_VISION: + return GPT4_VISION_IMAGE_TOKENIZER + case _: + assert_never(deployment_type) def _fit_longest(width: int, height: int, size: int) -> tuple[int, int]: diff --git a/aidial_adapter_openai/utils/json.py b/aidial_adapter_openai/utils/json.py new file mode 100644 index 0000000..2a7b8a6 --- /dev/null +++ b/aidial_adapter_openai/utils/json.py @@ -0,0 +1,2 @@ +def remove_nones(d: dict) -> dict: + return {k: v for k, v in d.items() if v is not None} diff --git a/tests/conftest.py b/tests/conftest.py index 876cdce..0a51517 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,9 +7,9 @@ from aidial_adapter_openai.utils.request import get_app_config -@pytest_asyncio.fixture +@pytest.fixture def _app_instance(): - return create_app(to_init_telemetry=False) + return create_app(init_telemetry=False) @pytest_asyncio.fixture From 44320f29ce1c1d986e9273724209726c4e917d9f Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Thu, 28 Nov 2024 20:46:41 +0200 Subject: [PATCH 08/10] Fix: handle completions endpoint --- aidial_adapter_openai/endpoints/chat_completion.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aidial_adapter_openai/endpoints/chat_completion.py b/aidial_adapter_openai/endpoints/chat_completion.py index e7f03a6..4682c42 100644 --- a/aidial_adapter_openai/endpoints/chat_completion.py +++ b/aidial_adapter_openai/endpoints/chat_completion.py @@ -35,7 +35,6 @@ async def call_chat_completion( - deployment_type: ChatCompletionDeploymentType, deployment_id: str, data: dict, is_stream: bool, @@ -67,6 +66,10 @@ async def call_chat_completion( deployment_id, app_config, ) + + deployment_type = app_config.get_chat_completion_deployment_type( + deployment_id + ) match deployment_type: case ChatCompletionDeploymentType.DALLE3: storage = create_file_storage("images", request.headers) @@ -140,9 +143,6 @@ async def chat_completion(deployment_id: str, request: Request): app_config = get_request_app_config(request) data = await parse_body(request) - deployment_type = app_config.get_chat_completion_deployment_type( - deployment_id - ) is_stream = bool(data.get("stream")) emulate_streaming = ( @@ -155,6 +155,6 @@ async def chat_completion(deployment_id: str, request: Request): return create_server_response( emulate_streaming, await call_chat_completion( - deployment_type, deployment_id, data, is_stream, request, app_config + deployment_id, data, is_stream, request, app_config ), ) From cbeda2b9a3e90fba96a1ee79fbdc7bf46eb714d5 Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Fri, 29 Nov 2024 17:14:21 +0200 Subject: [PATCH 09/10] minor fix --- aidial_adapter_openai/app_config.py | 6 ++++-- aidial_adapter_openai/utils/parsers.py | 4 ---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/aidial_adapter_openai/app_config.py b/aidial_adapter_openai/app_config.py index a59161a..3180172 100644 --- a/aidial_adapter_openai/app_config.py +++ b/aidial_adapter_openai/app_config.py @@ -8,7 +8,6 @@ from aidial_adapter_openai.utils.env import get_env_bool from aidial_adapter_openai.utils.json import remove_nones from aidial_adapter_openai.utils.log_config import logger -from aidial_adapter_openai.utils.parsers import parse_deployment_list class ApplicationConfig(BaseModel): @@ -47,7 +46,10 @@ def get_chat_completion_deployment_type( @classmethod def from_env(cls) -> "ApplicationConfig": def _parse_env_deployments(deployments_key: str) -> List[str] | None: - return parse_deployment_list(os.getenv(deployments_key)) or None + deployments_value = os.getenv(deployments_key) + if deployments_value is None: + return None + return list(map(str.strip, (deployments_value).split(","))) def _parse_env_dict(key: str) -> Dict[str, str] | None: value = os.getenv(key) diff --git a/aidial_adapter_openai/utils/parsers.py b/aidial_adapter_openai/utils/parsers.py index 8975093..15b7908 100644 --- a/aidial_adapter_openai/utils/parsers.py +++ b/aidial_adapter_openai/utils/parsers.py @@ -110,7 +110,3 @@ async def parse_body(request: Request) -> Dict[str, Any]: raise InvalidRequestError(str(data) + " is not of type 'object'") return data - - -def parse_deployment_list(deployments: str | None) -> List[str]: - return list(map(str.strip, (deployments or "").split(","))) From 5686a8c249fb8bcaff2dd0e971c68df88f58e1e0 Mon Sep 17 00:00:00 2001 From: Roman Romanov Date: Fri, 29 Nov 2024 17:23:15 +0200 Subject: [PATCH 10/10] Fix linter --- aidial_adapter_openai/utils/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aidial_adapter_openai/utils/parsers.py b/aidial_adapter_openai/utils/parsers.py index 15b7908..3591ff9 100644 --- a/aidial_adapter_openai/utils/parsers.py +++ b/aidial_adapter_openai/utils/parsers.py @@ -1,7 +1,7 @@ import re from abc import ABC, abstractmethod from json import JSONDecodeError -from typing import Any, Dict, List, TypedDict +from typing import Any, Dict, TypedDict from aidial_sdk.exceptions import InvalidRequestError from fastapi import Request