Skip to content

Commit

Permalink
feat: add support to Starling, Llama3 and Phi-3 models
Browse files Browse the repository at this point in the history
  • Loading branch information
umbertogriffo committed May 25, 2024
1 parent 011fa5a commit 41d9bb4
Show file tree
Hide file tree
Showing 14 changed files with 334 additions and 97 deletions.
200 changes: 115 additions & 85 deletions README.md

Large diffs are not rendered by default.

21 changes: 15 additions & 6 deletions chatbot/bot/model/model_settings.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from enum import Enum

from bot.model.dolphin import DolphinSettings
from bot.model.mistral import MistralSettings
from bot.model.neural_beagle import NeuralBeagleSettings
from bot.model.openchat import OpenChatSettings
from bot.model.stablelm_zephyr import StableLMZephyrSettings
from bot.model.zephyr import ZephyrSettings
from bot.model.settings.dolphin import DolphinSettings
from bot.model.settings.llama_3 import LlamaThreeSettings
from bot.model.settings.mistral import MistralSettings
from bot.model.settings.neural_beagle import NeuralBeagleSettings
from bot.model.settings.openchat import OpenChatSettings
from bot.model.settings.phi_3 import PhiThreeSettings
from bot.model.settings.stablelm_zephyr import StableLMZephyrSettings
from bot.model.settings.starling import StarlingSettings
from bot.model.settings.zephyr import ZephyrSettings


class ModelType(Enum):
Expand All @@ -14,7 +17,10 @@ class ModelType(Enum):
DOLPHIN = "dolphin"
STABLELM_ZEPHYR = "stablelm-zephyr"
OPENCHAT = "openchat"
STARLING = "starling"
NEURAL_BEAGLE = "neural-beagle"
PHI_3 = "phi-3"
LLAMA_3 = "llama-3"


SUPPORTED_MODELS = {
Expand All @@ -23,7 +29,10 @@ class ModelType(Enum):
ModelType.DOLPHIN.value: DolphinSettings,
ModelType.STABLELM_ZEPHYR.value: StableLMZephyrSettings,
ModelType.OPENCHAT.value: OpenChatSettings,
ModelType.STARLING.value: StarlingSettings,
ModelType.NEURAL_BEAGLE.value: NeuralBeagleSettings,
ModelType.PHI_3.value: PhiThreeSettings,
ModelType.LLAMA_3.value: LlamaThreeSettings,
}


Expand Down
Empty file.
File renamed without changes.
68 changes: 68 additions & 0 deletions chatbot/bot/model/settings/llama_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from bot.client.llm_client import LlmClientType
from bot.model.model import Model


class LlamaThreeSettings(Model):
url = "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
file_name = "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
clients = [LlmClientType.LAMA_CPP]
config = {
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available
}
config_answer = {"temperature": 0.7, "stop": []}
system_template = (
"<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are a helpful, respectful and "
"honest assistant. <|eot_id|><|start_header_id|>user<|end_header_id|>"
)
qa_prompt_template = """{system}\n
Answer the question below:
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
ctx_prompt_template = """{system}\n
Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question below:
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
refined_ctx_prompt_template = """{system}\n
{question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
(only if needed) with some more context below.
---------------------
{context}
---------------------
Given the new context, refine the original answer to better answer the query.
If the context isn't useful, return the original answer.
Refined Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
refined_question_conversation_awareness_prompt_template = """{system}\n
Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question.
Standalone question:<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

refined_answer_conversation_awareness_prompt_template = """
You are engaging in a conversation with a human participant who is unaware that they might be
interacting with a machine. \n
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n
The conversation should be natural, coherent, and contextually relevant. \n
Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}\n
Given the context provided in the Chat History and the follow up question, please answer the follow up question above.
If the follow up question isn't correlated to the context provided in the Chat History, please just answer the follow up
question, ignoring the context provided in the Chat History.
Please also don't reformulate the follow up question, and write just a concise answer.
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
File renamed without changes.
File renamed without changes.
File renamed without changes.
65 changes: 65 additions & 0 deletions chatbot/bot/model/settings/phi_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from bot.client.llm_client import LlmClientType
from bot.model.model import Model


class PhiThreeSettings(Model):
url = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
file_name = "Phi-3-mini-4k-instruct-q4.gguf"
clients = [LlmClientType.LAMA_CPP]
config = {
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available
}
config_answer = {"temperature": 0.7, "stop": []}
system_template = "You are a helpful, respectful and honest assistant. "
qa_prompt_template = """{system}\n
<|user|>\n Answer the question below:
{question}<|end|>\n<|assistant|>
"""
ctx_prompt_template = """{system}\n
<|user|>\n Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question below:
{question}<|end|>\n<|assistant|>
"""
refined_ctx_prompt_template = """{system}\n
<|user|>\n {question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
(only if needed) with some more context below.
---------------------
{context}
---------------------
Given the new context, refine the original answer to better answer the query.
If the context isn't useful, return the original answer.
Refined Answer:<|end|>\n<|assistant|>
"""
refined_question_conversation_awareness_prompt_template = """{system}\n
<|user|>\n Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question.
Standalone question:<|end|>\n<|assistant|>
"""

refined_answer_conversation_awareness_prompt_template = """
<|user|>\n You are engaging in a conversation with a human participant who is unaware that they might be
interacting with a machine. \n
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n
The conversation should be natural, coherent, and contextually relevant. \n
Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}\n
Given the context provided in the Chat History and the follow up question, please answer the follow up question above.
If the follow up question isn't correlated to the context provided in the Chat History, please just answer the follow up
question, ignoring the context provided in the Chat History.
Please also don't reformulate the follow up question, and write just a concise answer.
<|end|>\n<|assistant|>
"""
File renamed without changes.
65 changes: 65 additions & 0 deletions chatbot/bot/model/settings/starling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from bot.client.llm_client import LlmClientType
from bot.model.model import Model


class StarlingSettings(Model):
url = "https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF/resolve/main/Starling-LM-7B-beta-Q4_K_M.gguf"
file_name = "Starling-LM-7B-beta-Q4_K_M.gguf"
clients = [LlmClientType.LAMA_CPP]
config = {
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available
}
config_answer = {"temperature": 0.7, "stop": []}
system_template = "You are a helpful, respectful and honest assistant. "
qa_prompt_template = """{system}\n
GPT4 Correct User: Answer the question below:
{question}<|end_of_turn|>GPT4 Correct Assistant:
"""
ctx_prompt_template = """{system}\n
GPT4 Correct User: Context information is below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question below:
{question}<|end_of_turn|>GPT4 Correct Assistant:
"""
refined_ctx_prompt_template = """{system}\n
GPT4 Correct User: The original query is as follows: {question}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer
(only if needed) with some more context below.
---------------------
{context}
---------------------
Given the new context, refine the original answer to better answer the query.
If the context isn't useful, return the original answer.
Refined Answer:<|end_of_turn|>GPT4 Correct Assistant:
"""
refined_question_conversation_awareness_prompt_template = """{system}\n
GPT4 Correct User: Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question.
Standalone question:<|end_of_turn|>GPT4 Correct Assistant:
"""

refined_answer_conversation_awareness_prompt_template = """
GPT4 Correct User: You are engaging in a conversation with a human participant who is unaware that they might be
interacting with a machine. \n
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n
The conversation should be natural, coherent, and contextually relevant. \n
Chat History:
---------------------
{chat_history}
---------------------
Follow Up Question: {question}\n
Given the context provided in the Chat History and the follow up question, please answer the follow up question above.
If the follow up question isn't correlated to the context provided in the Chat History, please just answer the follow up
question, ignoring the context provided in the Chat History.
Please also don't reformulate the follow up question, and write just a concise answer.
<|end_of_turn|>GPT4 Correct Assistant:
"""
File renamed without changes.
10 changes: 5 additions & 5 deletions todo.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Todo
- [ ] `llama-cpp-python` version `0.2.29` has a serious issue https://github.com/abetlen/llama-cpp-python/issues/1089 - Introduce unit tests to update to newer `llama-cpp-python` versions confidently.
- [ ] try https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF (also the beta version).
- [ ] try https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf
- [ ] try Chat Templates https://medium.com/@ahmet_celebi/demystifying-chat-templates-of-llm-using-llama-cpp-and-ctransformers-f17871569cd6
- [ ] make docker container
- Test `openchat-3.6-8b-20240522`:
- https://huggingface.co/openchat/openchat-3.6-8b-20240522
- https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF
- Try Chat Templates https://medium.com/@ahmet_celebi/demystifying-chat-templates-of-llm-using-llama-cpp-and-ctransformers-f17871569cd6
- Make docker container
2 changes: 1 addition & 1 deletion version/llama_cpp
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.28
0.2.76

0 comments on commit 41d9bb4

Please sign in to comment.