From 5f4f69431adb91b751453ff2e1983fa56d690bb0 Mon Sep 17 00:00:00 2001
From: Avram Tudor <tudor.avram@8x8.com>
Date: Thu, 10 Oct 2024 10:22:51 +0000
Subject: [PATCH] fix: increase gpu utilization

---
 run.sh                               | 2 +-
 skynet/modules/ttt/openai_api/app.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/run.sh b/run.sh
index 0bce377..7c8300c 100755
--- a/run.sh
+++ b/run.sh
@@ -9,5 +9,5 @@ else
   cd ..
 fi
 
-export LLAMA_N_CTX=16000
+export LLAMA_N_CTX=44000
 poetry run python -m uvicorn skynet.main:app --reload
diff --git a/skynet/modules/ttt/openai_api/app.py b/skynet/modules/ttt/openai_api/app.py
index 5238aa7..7de30a6 100644
--- a/skynet/modules/ttt/openai_api/app.py
+++ b/skynet/modules/ttt/openai_api/app.py
@@ -31,7 +31,7 @@ def initialize():
             f'python -m {openai_api_server_path} \
                 --disable-log-requests \
                 --model {llama_path} \
-                --gpu_memory_utilization 0.95 \
+                --gpu_memory_utilization 0.99 \
                 --max-model-len {llama_n_ctx} \
                 --port {openai_api_server_port}'.split(),
             shell=False,