From d7cd34d2d3616a11d8bd9fc9b40f5beb5fc5c3ab Mon Sep 17 00:00:00 2001
From: Umberto Griffo <1609440+umbertogriffo@users.noreply.github.com>
Date: Tue, 27 Aug 2024 16:48:04 +0100
Subject: [PATCH] refactor: system template as a constant

---
 chatbot/bot/client/lama_cpp_client.py         | 21 ++++++++++---------
 chatbot/bot/client/prompt.py                  |  4 ++++
 chatbot/bot/model/model.py                    |  3 ---
 chatbot/bot/model/settings/llama_3.py         |  1 -
 chatbot/bot/model/settings/openchat.py        |  2 --
 chatbot/bot/model/settings/phi_3.py           |  1 -
 chatbot/bot/model/settings/stablelm_zephyr.py |  1 -
 chatbot/bot/model/settings/starling.py        |  1 -
 8 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/chatbot/bot/client/lama_cpp_client.py b/chatbot/bot/client/lama_cpp_client.py
index 324c549..0bf0e35 100644
--- a/chatbot/bot/client/lama_cpp_client.py
+++ b/chatbot/bot/client/lama_cpp_client.py
@@ -12,6 +12,7 @@
     REFINED_ANSWER_CONVERSATION_AWARENESS_PROMPT_TEMPLATE,
     REFINED_CTX_PROMPT_TEMPLATE,
     REFINED_QUESTION_CONVERSATION_AWARENESS_PROMPT_TEMPLATE,
+    SYSTEM_TEMPLATE,
     generate_conversation_awareness_prompt,
     generate_ctx_prompt,
     generate_qa_prompt,
@@ -22,7 +23,7 @@
 
 class LamaCppClient:
     """
-    Class for implementing language model clients.
+    Class for implementing language model client.
     """
 
     def __init__(self, model_folder: Path, model_settings: Model):
@@ -99,7 +100,7 @@ def generate_answer(self, prompt: str, max_new_tokens: int = 512) -> str:
 
         output = self.llm.create_chat_completion(
             messages=[
-                {"role": "system", "content": self.model_settings.system_template},
+                {"role": "system", "content": SYSTEM_TEMPLATE},
                 {"role": "user", "content": f"{prompt}"},
             ],
             max_tokens=max_new_tokens,
@@ -123,7 +124,7 @@ async def async_generate_answer(self, prompt: str, max_new_tokens: int = 512) ->
         """
         output = self.llm.create_chat_completion(
             messages=[
-                {"role": "system", "content": self.model_settings.system_template},
+                {"role": "system", "content": SYSTEM_TEMPLATE},
                 {"role": "user", "content": f"{prompt}"},
             ],
             max_tokens=max_new_tokens,
@@ -168,7 +169,7 @@ def start_answer_iterator_streamer(
         """
         stream = self.llm.create_chat_completion(
             messages=[
-                {"role": "system", "content": self.model_settings.system_template},
+                {"role": "system", "content": SYSTEM_TEMPLATE},
                 {"role": "user", "content": f"{prompt}"},
             ],
             max_tokens=max_new_tokens,
@@ -192,7 +193,7 @@ async def async_start_answer_iterator_streamer(
         """
         stream = self.llm.create_chat_completion(
             messages=[
-                {"role": "system", "content": self.model_settings.system_template},
+                {"role": "system", "content": SYSTEM_TEMPLATE},
                 {"role": "user", "content": f"{prompt}"},
             ],
             max_tokens=max_new_tokens,
@@ -217,7 +218,7 @@ def generate_qa_prompt(self, question: str) -> str:
         """
         return generate_qa_prompt(
             template=QA_PROMPT_TEMPLATE,
-            system=self.model_settings.system_template,
+            system=SYSTEM_TEMPLATE,
             question=question,
         )
 
@@ -234,7 +235,7 @@ def generate_ctx_prompt(self, question: str, context: str) -> str:
         """
         return generate_ctx_prompt(
             template=CTX_PROMPT_TEMPLATE,
-            system=self.model_settings.system_template,
+            system=SYSTEM_TEMPLATE,
             question=question,
             context=context,
         )
@@ -253,7 +254,7 @@ def generate_refined_ctx_prompt(self, question: str, context: str, existing_answ
         """
         return generate_refined_ctx_prompt(
             template=REFINED_CTX_PROMPT_TEMPLATE,
-            system=self.model_settings.system_template,
+            system=SYSTEM_TEMPLATE,
             question=question,
             context=context,
             existing_answer=existing_answer,
@@ -262,7 +263,7 @@ def generate_refined_ctx_prompt(self, question: str, context: str, existing_answ
     def generate_refined_question_conversation_awareness_prompt(self, question: str, chat_history: str) -> str:
         return generate_conversation_awareness_prompt(
             template=REFINED_QUESTION_CONVERSATION_AWARENESS_PROMPT_TEMPLATE,
-            system=self.model_settings.system_template,
+            system=SYSTEM_TEMPLATE,
             question=question,
             chat_history=chat_history,
         )
@@ -270,7 +271,7 @@ def generate_refined_question_conversation_awareness_prompt(self, question: str,
     def generate_refined_answer_conversation_awareness_prompt(self, question: str, chat_history: str) -> str:
         return generate_conversation_awareness_prompt(
             template=REFINED_ANSWER_CONVERSATION_AWARENESS_PROMPT_TEMPLATE,
-            system=self.model_settings.system_template,
+            system=SYSTEM_TEMPLATE,
             question=question,
             chat_history=chat_history,
         )
diff --git a/chatbot/bot/client/prompt.py b/chatbot/bot/client/prompt.py
index adde08d..025f213 100644
--- a/chatbot/bot/client/prompt.py
+++ b/chatbot/bot/client/prompt.py
@@ -1,3 +1,7 @@
+# A string template for the system message.
+# This template is used to define the behavior and characteristics of the assistant.
+SYSTEM_TEMPLATE = """You are a helpful, respectful and honest assistant."""
+
 # A string template with placeholders for question.
 QA_PROMPT_TEMPLATE = """Answer the question below:
 {question}
diff --git a/chatbot/bot/model/model.py b/chatbot/bot/model/model.py
index 2be8843..d35fdae 100644
--- a/chatbot/bot/model/model.py
+++ b/chatbot/bot/model/model.py
@@ -5,8 +5,5 @@
 class Model(ABC):
     url: str
     file_name: str
-    clients: list[str]
     config: Dict[str, Any]
     config_answer: Optional[Dict[str, Any]]
-    type: Optional[str]
-    system_template: str
diff --git a/chatbot/bot/model/settings/llama_3.py b/chatbot/bot/model/settings/llama_3.py
index 2f390b5..c3040ec 100644
--- a/chatbot/bot/model/settings/llama_3.py
+++ b/chatbot/bot/model/settings/llama_3.py
@@ -10,4 +10,3 @@ class Llama3Settings(Model):
         "n_gpu_layers": 50,  # The number of layers to offload to GPU, if you have GPU acceleration available
     }
     config_answer = {"temperature": 0.7, "stop": []}
-    system_template = "You are a helpful, respectful and honest assistant. "
diff --git a/chatbot/bot/model/settings/openchat.py b/chatbot/bot/model/settings/openchat.py
index 5f2ad22..082b071 100644
--- a/chatbot/bot/model/settings/openchat.py
+++ b/chatbot/bot/model/settings/openchat.py
@@ -10,7 +10,6 @@ class OpenChat35Settings(Model):
         "n_gpu_layers": 50,  # The number of layers to offload to GPU, if you have GPU acceleration available
     }
     config_answer = {"temperature": 0.7, "stop": []}
-    system_template = "You are a helpful, respectful and honest assistant. "
 
 
 class OpenChat36Settings(Model):
@@ -23,4 +22,3 @@ class OpenChat36Settings(Model):
         "flash_attn": False,  # Use flash attention.
     }
     config_answer = {"temperature": 0.7, "stop": []}
-    system_template = ""
diff --git a/chatbot/bot/model/settings/phi_3.py b/chatbot/bot/model/settings/phi_3.py
index 20bef46..6774a08 100644
--- a/chatbot/bot/model/settings/phi_3.py
+++ b/chatbot/bot/model/settings/phi_3.py
@@ -10,4 +10,3 @@ class PhiThreeSettings(Model):
         "n_gpu_layers": 33,  # The number of layers to offload to GPU, if you have GPU acceleration available
     }
     config_answer = {"temperature": 0.7, "stop": []}
-    system_template = "You are a helpful, respectful and honest assistant. "
diff --git a/chatbot/bot/model/settings/stablelm_zephyr.py b/chatbot/bot/model/settings/stablelm_zephyr.py
index f711d97..9498ed1 100644
--- a/chatbot/bot/model/settings/stablelm_zephyr.py
+++ b/chatbot/bot/model/settings/stablelm_zephyr.py
@@ -10,4 +10,3 @@ class StableLMZephyrSettings(Model):
         "n_gpu_layers": 35,  # The number of layers to offload to GPU, if you have GPU acceleration available
     }
     config_answer = {"temperature": 0.7, "stop": []}
-    system_template = ""
diff --git a/chatbot/bot/model/settings/starling.py b/chatbot/bot/model/settings/starling.py
index 956a922..931aa51 100644
--- a/chatbot/bot/model/settings/starling.py
+++ b/chatbot/bot/model/settings/starling.py
@@ -10,4 +10,3 @@ class StarlingSettings(Model):
         "n_gpu_layers": 50,  # The number of layers to offload to GPU, if you have GPU acceleration available
     }
     config_answer = {"temperature": 0.7, "stop": []}
-    system_template = "You are a helpful, respectful and honest assistant. "