Skip to content

Commit 86ef53a

Browse files
committed
cleanup
1 parent b6bbe4a commit 86ef53a

File tree

10 files changed

+78
-50
lines changed

10 files changed

+78
-50
lines changed

.env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
DEFAULT_API_VERSION=2023-03-15-preview
1+
AZURE_API_VERSION=2023-03-15-preview
22
MODEL_ALIASES={}
33
LOG_LEVEL=INFO

Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ build: install
1616
serve: install
1717
poetry run uvicorn "aidial_adapter_openai.app:app" --reload --host "0.0.0.0" --port $(PORT) --workers=1 --env-file ./.env
1818

19-
client: install
20-
poetry run python -m client.client_adapter $(ARGS)
21-
2219
clean:
2320
poetry run clean
2421
poetry env remove --all

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ Copy `.env.example` to `.env` and customize it for your environment:
5555
|---|---|---|
5656
|LOG_LEVEL|INFO|Log level. Use DEBUG for dev purposes and INFO in prod|
5757
|WEB_CONCURRENCY|1|Number of workers for the server|
58-
|DEFAULT_API_VERSION|2023-03-15-preview|The default version API for requests to Azure Openai API for cases when the user request doesn't contain "api-version"|
59-
|MODEL_ALIASES|{"gpt-35-turbo":"gpt-3.5-turbo-0301"}|Mapping request's deployment_id to [model name of tiktoken](https://github.com/openai/tiktoken/blob/main/tiktoken/model.py) for correct calculate of tokens.|
58+
|AZURE_API_VERSION|2023-03-15-preview|The version API for requests to Azure OpenAI API|
59+
|MODEL_ALIASES|{}|Mapping request's deployment_id to [model name of tiktoken](https://github.com/openai/tiktoken/blob/main/tiktoken/model.py) for correct calculate of tokens. Example: `{"gpt-35-turbo":"gpt-3.5-turbo-0301"}`|
6060

6161
### Docker
6262

aidial_adapter_openai/app.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
logging.config.dictConfig(LogConfig().dict())
2121
app = FastAPI()
2222
model_aliases = json.loads(os.getenv("MODEL_ALIASES", "{}"))
23-
default_api_version = os.getenv("DEFAULT_API_VERSION", "2023-03-15-preview")
23+
azure_api_version = os.getenv("AZURE_API_VERSION", "2023-03-15-preview")
2424

2525

2626
async def handle_exceptions(call):
@@ -38,7 +38,6 @@ async def handle_exceptions(call):
3838

3939
@app.post("/openai/deployments/{deployment_id}/chat/completions")
4040
async def chat_completion(deployment_id: str, request: Request):
41-
api_version = request.query_params.get("api-version", default_api_version)
4241
data = await parse_body(request)
4342

4443
is_stream = data.get("stream", False)
@@ -54,7 +53,7 @@ async def chat_completion(deployment_id: str, request: Request):
5453
api_key=dial_api_key,
5554
api_base=api_base,
5655
api_type="azure",
57-
api_version=api_version,
56+
api_version=azure_api_version,
5857
request_timeout=(10, 600), # connect timeout and total timeout
5958
**data
6059
)
@@ -76,7 +75,6 @@ async def chat_completion(deployment_id: str, request: Request):
7675

7776
@app.post("/openai/deployments/{deployment_id}/embeddings")
7877
async def embedding(deployment_id: str, request: Request):
79-
api_version = request.query_params.get("api-version", default_api_version)
8078
data = await parse_body(request)
8179

8280
dial_api_key = request.headers["X-UPSTREAM-KEY"]
@@ -90,7 +88,7 @@ async def embedding(deployment_id: str, request: Request):
9088
api_key=dial_api_key,
9189
api_base=api_base,
9290
api_type="azure",
93-
api_version=api_version,
91+
api_version=azure_api_version,
9492
request_timeout=(10, 600), # connect timeout and total timeout
9593
**data
9694
)
@@ -117,10 +115,5 @@ def health():
117115
return {"status": "ok"}
118116

119117

120-
@app.get("/blah")
121-
def blah():
122-
return {"blah": "blah"}
123-
124-
125118
if __name__ == "__main__":
126119
uvicorn.run(app, port=5000)

aidial_adapter_openai/openai_override.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
OpenAI SDK translates various HTTP errors received from OpenAI API
3+
into Python exceptions: error.RateLimitError, error.InvalidRequestError,
4+
error.AuthenticationError etc.
5+
6+
We want to retranslate the original HTTP errors to the user.
7+
So the standard error handlers in the openai.api_requestor.APIRequestor class
8+
are rewritten to wrap the original HTTP errors into OpenAIException and raise it.
9+
"""
110
import json
211
from json import JSONDecodeError
312

@@ -16,12 +25,12 @@ def __init__(self, body, code, resp, headers):
1625
super().__init__(resp)
1726

1827

19-
# Overrided to proxy original errors
28+
# Overridden to proxy original errors
2029
def handle_error_response_wrapper(wrapped, self, args, kwargs):
2130
raise OpenAIException(*args)
2231

2332

24-
# Overrided to proxy original errors
33+
# Overridden to proxy original errors
2534
def interpret_response_line_wrapper(wrapped, self: APIRequestor, args, kwargs):
2635
rbody, rcode, rheaders = args
2736
stream = kwargs.get("stream", False)

aidial_adapter_openai/utils/tokens.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
"""
2+
Implemented based on the official recipe: https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
3+
"""
14
from typing import Any, List
25

36
from tiktoken import Encoding
@@ -7,16 +10,20 @@ def calculate_prompt_tokens(
710
messages: List[Any], model: str, encoding: Encoding
811
):
912
prompt_tokens = 3
10-
tokens_per_message = (
11-
4 if model == "gpt-3.5-turbo-0301" else 3
12-
) # possible need change gpt-3.5-turbo to something anything
13+
14+
if model == "gpt-3.5-turbo-0301":
15+
tokens_per_message = 4
16+
tokens_per_name = -1
17+
else:
18+
tokens_per_message = 3
19+
tokens_per_name = 1
1320

1421
for message in messages:
1522
prompt_tokens += tokens_per_message
1623

1724
for key, value in message.items():
1825
prompt_tokens += len(encoding.encode(value))
1926
if key == "name":
20-
prompt_tokens += 1
27+
prompt_tokens += tokens_per_name
2128

2229
return prompt_tokens

poetry.lock

Lines changed: 7 additions & 22 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ pydantic = "^1.10.12"
3131
[tool.poetry.group.test.dependencies]
3232
pytest = "7.4.0"
3333
python-dotenv = "1.0.0"
34-
pytest-dotenv = "^0.5.0"
3534
pytest-aioresponses = "^0.2.0"
3635
httpx = "^0.25.0"
3736

tests/test_errors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ async def test_error_during_streaming(aioresponses: aioresponses):
8585

8686

8787
@pytest.mark.asyncio
88-
async def test_incorrect_upsteram_url(aioresponses: aioresponses):
88+
async def test_incorrect_upstream_url(aioresponses: aioresponses):
8989
aioresponses.post(
9090
"http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview",
9191
status=200,

tests/test_streaming.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
@pytest.mark.asyncio
1111
async def test_streaming(aioresponses: aioresponses):
1212
aioresponses.post(
13-
"http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-06-15",
13+
"http://localhost:5001/openai/deployments/gpt-4/chat/completions?api-version=2023-03-15-preview",
1414
status=200,
1515
body="data: "
1616
+ json.dumps(
@@ -22,8 +22,8 @@ async def test_streaming(aioresponses: aioresponses):
2222
"choices": [
2323
{
2424
"index": 0,
25-
"finish_reason": "stop",
26-
"message": {
25+
"finish_reason": None,
26+
"delta": {
2727
"role": "assistant",
2828
},
2929
}
@@ -33,6 +33,26 @@ async def test_streaming(aioresponses: aioresponses):
3333
)
3434
+ "\n\n"
3535
+ "data: "
36+
+ json.dumps(
37+
{
38+
"id": "chatcmpl-test",
39+
"object": "chat.completion.chunk",
40+
"created": 1695940483,
41+
"model": "gpt-4",
42+
"choices": [
43+
{
44+
"index": 0,
45+
"finish_reason": None,
46+
"delta": {
47+
"content": "Test content",
48+
},
49+
}
50+
],
51+
"usage": None,
52+
}
53+
)
54+
+ "\n\n"
55+
+ "data: "
3656
+ json.dumps(
3757
{
3858
"id": "chatcmpl-test",
@@ -68,4 +88,22 @@ async def test_streaming(aioresponses: aioresponses):
6888
assert line == ""
6989
continue
7090

71-
print("!", index, line, "!") # TODO: change to asserts
91+
if index == 0:
92+
assert (
93+
line
94+
== 'data: {"id":"chatcmpl-test","object":"chat.completion.chunk","created":1695940483,"model":"gpt-4","choices":[{"index":0,"finish_reason":null,"delta":{"role":"assistant"}}],"usage":null}'
95+
)
96+
elif index == 2:
97+
assert (
98+
line
99+
== 'data: {"id":"chatcmpl-test","object":"chat.completion.chunk","created":1695940483,"model":"gpt-4","choices":[{"index":0,"finish_reason":null,"delta":{"content":"Test content"}}],"usage":null}'
100+
)
101+
elif index == 4:
102+
assert (
103+
line
104+
== 'data: {"id":"chatcmpl-test","object":"chat.completion.chunk","created":1696245654,"model":"gpt-4","choices":[{"index":0,"finish_reason":"stop","delta":{}}],"usage":{"completion_tokens":2,"prompt_tokens":9,"total_tokens":11}}'
105+
)
106+
elif index == 6:
107+
assert line == "data: [DONE]"
108+
else:
109+
assert False

0 commit comments

Comments
 (0)