From 5f4f69431adb91b751453ff2e1983fa56d690bb0 Mon Sep 17 00:00:00 2001 From: Avram Tudor Date: Thu, 10 Oct 2024 10:22:51 +0000 Subject: [PATCH] fix: increase gpu utilization --- run.sh | 2 +- skynet/modules/ttt/openai_api/app.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/run.sh b/run.sh index 0bce377..7c8300c 100755 --- a/run.sh +++ b/run.sh @@ -9,5 +9,5 @@ else cd .. fi -export LLAMA_N_CTX=16000 +export LLAMA_N_CTX=44000 poetry run python -m uvicorn skynet.main:app --reload diff --git a/skynet/modules/ttt/openai_api/app.py b/skynet/modules/ttt/openai_api/app.py index 5238aa7..7de30a6 100644 --- a/skynet/modules/ttt/openai_api/app.py +++ b/skynet/modules/ttt/openai_api/app.py @@ -31,7 +31,7 @@ def initialize(): f'python -m {openai_api_server_path} \ --disable-log-requests \ --model {llama_path} \ - --gpu_memory_utilization 0.95 \ + --gpu_memory_utilization 0.99 \ --max-model-len {llama_n_ctx} \ --port {openai_api_server_port}'.split(), shell=False,