From d7cd34d2d3616a11d8bd9fc9b40f5beb5fc5c3ab Mon Sep 17 00:00:00 2001 From: Umberto Griffo <1609440+umbertogriffo@users.noreply.github.com> Date: Tue, 27 Aug 2024 16:48:04 +0100 Subject: [PATCH] refactor: system template as a constant --- chatbot/bot/client/lama_cpp_client.py | 21 ++++++++++--------- chatbot/bot/client/prompt.py | 4 ++++ chatbot/bot/model/model.py | 3 --- chatbot/bot/model/settings/llama_3.py | 1 - chatbot/bot/model/settings/openchat.py | 2 -- chatbot/bot/model/settings/phi_3.py | 1 - chatbot/bot/model/settings/stablelm_zephyr.py | 1 - chatbot/bot/model/settings/starling.py | 1 - 8 files changed, 15 insertions(+), 19 deletions(-) diff --git a/chatbot/bot/client/lama_cpp_client.py b/chatbot/bot/client/lama_cpp_client.py index 324c549..0bf0e35 100644 --- a/chatbot/bot/client/lama_cpp_client.py +++ b/chatbot/bot/client/lama_cpp_client.py @@ -12,6 +12,7 @@ REFINED_ANSWER_CONVERSATION_AWARENESS_PROMPT_TEMPLATE, REFINED_CTX_PROMPT_TEMPLATE, REFINED_QUESTION_CONVERSATION_AWARENESS_PROMPT_TEMPLATE, + SYSTEM_TEMPLATE, generate_conversation_awareness_prompt, generate_ctx_prompt, generate_qa_prompt, @@ -22,7 +23,7 @@ class LamaCppClient: """ - Class for implementing language model clients. + Class for implementing language model client. """ def __init__(self, model_folder: Path, model_settings: Model): @@ -99,7 +100,7 @@ def generate_answer(self, prompt: str, max_new_tokens: int = 512) -> str: output = self.llm.create_chat_completion( messages=[ - {"role": "system", "content": self.model_settings.system_template}, + {"role": "system", "content": SYSTEM_TEMPLATE}, {"role": "user", "content": f"{prompt}"}, ], max_tokens=max_new_tokens, @@ -123,7 +124,7 @@ async def async_generate_answer(self, prompt: str, max_new_tokens: int = 512) -> """ output = self.llm.create_chat_completion( messages=[ - {"role": "system", "content": self.model_settings.system_template}, + {"role": "system", "content": SYSTEM_TEMPLATE}, {"role": "user", "content": f"{prompt}"}, ], max_tokens=max_new_tokens, @@ -168,7 +169,7 @@ def start_answer_iterator_streamer( """ stream = self.llm.create_chat_completion( messages=[ - {"role": "system", "content": self.model_settings.system_template}, + {"role": "system", "content": SYSTEM_TEMPLATE}, {"role": "user", "content": f"{prompt}"}, ], max_tokens=max_new_tokens, @@ -192,7 +193,7 @@ async def async_start_answer_iterator_streamer( """ stream = self.llm.create_chat_completion( messages=[ - {"role": "system", "content": self.model_settings.system_template}, + {"role": "system", "content": SYSTEM_TEMPLATE}, {"role": "user", "content": f"{prompt}"}, ], max_tokens=max_new_tokens, @@ -217,7 +218,7 @@ def generate_qa_prompt(self, question: str) -> str: """ return generate_qa_prompt( template=QA_PROMPT_TEMPLATE, - system=self.model_settings.system_template, + system=SYSTEM_TEMPLATE, question=question, ) @@ -234,7 +235,7 @@ def generate_ctx_prompt(self, question: str, context: str) -> str: """ return generate_ctx_prompt( template=CTX_PROMPT_TEMPLATE, - system=self.model_settings.system_template, + system=SYSTEM_TEMPLATE, question=question, context=context, ) @@ -253,7 +254,7 @@ def generate_refined_ctx_prompt(self, question: str, context: str, existing_answ """ return generate_refined_ctx_prompt( template=REFINED_CTX_PROMPT_TEMPLATE, - system=self.model_settings.system_template, + system=SYSTEM_TEMPLATE, question=question, context=context, existing_answer=existing_answer, @@ -262,7 +263,7 @@ def generate_refined_ctx_prompt(self, question: str, context: str, existing_answ def generate_refined_question_conversation_awareness_prompt(self, question: str, chat_history: str) -> str: return generate_conversation_awareness_prompt( template=REFINED_QUESTION_CONVERSATION_AWARENESS_PROMPT_TEMPLATE, - system=self.model_settings.system_template, + system=SYSTEM_TEMPLATE, question=question, chat_history=chat_history, ) @@ -270,7 +271,7 @@ def generate_refined_question_conversation_awareness_prompt(self, question: str, def generate_refined_answer_conversation_awareness_prompt(self, question: str, chat_history: str) -> str: return generate_conversation_awareness_prompt( template=REFINED_ANSWER_CONVERSATION_AWARENESS_PROMPT_TEMPLATE, - system=self.model_settings.system_template, + system=SYSTEM_TEMPLATE, question=question, chat_history=chat_history, ) diff --git a/chatbot/bot/client/prompt.py b/chatbot/bot/client/prompt.py index adde08d..025f213 100644 --- a/chatbot/bot/client/prompt.py +++ b/chatbot/bot/client/prompt.py @@ -1,3 +1,7 @@ +# A string template for the system message. +# This template is used to define the behavior and characteristics of the assistant. +SYSTEM_TEMPLATE = """You are a helpful, respectful and honest assistant.""" + # A string template with placeholders for question. QA_PROMPT_TEMPLATE = """Answer the question below: {question} diff --git a/chatbot/bot/model/model.py b/chatbot/bot/model/model.py index 2be8843..d35fdae 100644 --- a/chatbot/bot/model/model.py +++ b/chatbot/bot/model/model.py @@ -5,8 +5,5 @@ class Model(ABC): url: str file_name: str - clients: list[str] config: Dict[str, Any] config_answer: Optional[Dict[str, Any]] - type: Optional[str] - system_template: str diff --git a/chatbot/bot/model/settings/llama_3.py b/chatbot/bot/model/settings/llama_3.py index 2f390b5..c3040ec 100644 --- a/chatbot/bot/model/settings/llama_3.py +++ b/chatbot/bot/model/settings/llama_3.py @@ -10,4 +10,3 @@ class Llama3Settings(Model): "n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available } config_answer = {"temperature": 0.7, "stop": []} - system_template = "You are a helpful, respectful and honest assistant. " diff --git a/chatbot/bot/model/settings/openchat.py b/chatbot/bot/model/settings/openchat.py index 5f2ad22..082b071 100644 --- a/chatbot/bot/model/settings/openchat.py +++ b/chatbot/bot/model/settings/openchat.py @@ -10,7 +10,6 @@ class OpenChat35Settings(Model): "n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available } config_answer = {"temperature": 0.7, "stop": []} - system_template = "You are a helpful, respectful and honest assistant. " class OpenChat36Settings(Model): @@ -23,4 +22,3 @@ class OpenChat36Settings(Model): "flash_attn": False, # Use flash attention. } config_answer = {"temperature": 0.7, "stop": []} - system_template = "" diff --git a/chatbot/bot/model/settings/phi_3.py b/chatbot/bot/model/settings/phi_3.py index 20bef46..6774a08 100644 --- a/chatbot/bot/model/settings/phi_3.py +++ b/chatbot/bot/model/settings/phi_3.py @@ -10,4 +10,3 @@ class PhiThreeSettings(Model): "n_gpu_layers": 33, # The number of layers to offload to GPU, if you have GPU acceleration available } config_answer = {"temperature": 0.7, "stop": []} - system_template = "You are a helpful, respectful and honest assistant. " diff --git a/chatbot/bot/model/settings/stablelm_zephyr.py b/chatbot/bot/model/settings/stablelm_zephyr.py index f711d97..9498ed1 100644 --- a/chatbot/bot/model/settings/stablelm_zephyr.py +++ b/chatbot/bot/model/settings/stablelm_zephyr.py @@ -10,4 +10,3 @@ class StableLMZephyrSettings(Model): "n_gpu_layers": 35, # The number of layers to offload to GPU, if you have GPU acceleration available } config_answer = {"temperature": 0.7, "stop": []} - system_template = "" diff --git a/chatbot/bot/model/settings/starling.py b/chatbot/bot/model/settings/starling.py index 956a922..931aa51 100644 --- a/chatbot/bot/model/settings/starling.py +++ b/chatbot/bot/model/settings/starling.py @@ -10,4 +10,3 @@ class StarlingSettings(Model): "n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available } config_answer = {"temperature": 0.7, "stop": []} - system_template = "You are a helpful, respectful and honest assistant. "