-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add support to Starling, Llama3 and Phi-3 models
- Loading branch information
1 parent
011fa5a
commit 41d9bb4
Showing
14 changed files
with
334 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from bot.client.llm_client import LlmClientType | ||
from bot.model.model import Model | ||
|
||
|
||
class LlamaThreeSettings(Model): | ||
url = "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" | ||
file_name = "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" | ||
clients = [LlmClientType.LAMA_CPP] | ||
config = { | ||
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources | ||
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance | ||
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available | ||
} | ||
config_answer = {"temperature": 0.7, "stop": []} | ||
system_template = ( | ||
"<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are a helpful, respectful and " | ||
"honest assistant. <|eot_id|><|start_header_id|>user<|end_header_id|>" | ||
) | ||
qa_prompt_template = """{system}\n | ||
Answer the question below: | ||
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|> | ||
""" | ||
ctx_prompt_template = """{system}\n | ||
Context information is below. | ||
--------------------- | ||
{context} | ||
--------------------- | ||
Given the context information and not prior knowledge, answer the question below: | ||
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|> | ||
""" | ||
refined_ctx_prompt_template = """{system}\n | ||
{question} | ||
We have provided an existing answer: {existing_answer} | ||
We have the opportunity to refine the existing answer | ||
(only if needed) with some more context below. | ||
--------------------- | ||
{context} | ||
--------------------- | ||
Given the new context, refine the original answer to better answer the query. | ||
If the context isn't useful, return the original answer. | ||
Refined Answer:<|eot_id|><|start_header_id|>assistant<|end_header_id|> | ||
""" | ||
refined_question_conversation_awareness_prompt_template = """{system}\n | ||
Chat History: | ||
--------------------- | ||
{chat_history} | ||
--------------------- | ||
Follow Up Question: {question} | ||
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question. | ||
Standalone question:<|eot_id|><|start_header_id|>assistant<|end_header_id|> | ||
""" | ||
|
||
refined_answer_conversation_awareness_prompt_template = """ | ||
You are engaging in a conversation with a human participant who is unaware that they might be | ||
interacting with a machine. \n | ||
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n | ||
The conversation should be natural, coherent, and contextually relevant. \n | ||
Chat History: | ||
--------------------- | ||
{chat_history} | ||
--------------------- | ||
Follow Up Question: {question}\n | ||
Given the context provided in the Chat History and the follow up question, please answer the follow up question above. | ||
If the follow up question isn't correlated to the context provided in the Chat History, please just answer the follow up | ||
question, ignoring the context provided in the Chat History. | ||
Please also don't reformulate the follow up question, and write just a concise answer. | ||
<|eot_id|><|start_header_id|>assistant<|end_header_id|> | ||
""" |
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from bot.client.llm_client import LlmClientType | ||
from bot.model.model import Model | ||
|
||
|
||
class PhiThreeSettings(Model): | ||
url = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf" | ||
file_name = "Phi-3-mini-4k-instruct-q4.gguf" | ||
clients = [LlmClientType.LAMA_CPP] | ||
config = { | ||
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources | ||
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance | ||
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available | ||
} | ||
config_answer = {"temperature": 0.7, "stop": []} | ||
system_template = "You are a helpful, respectful and honest assistant. " | ||
qa_prompt_template = """{system}\n | ||
<|user|>\n Answer the question below: | ||
{question}<|end|>\n<|assistant|> | ||
""" | ||
ctx_prompt_template = """{system}\n | ||
<|user|>\n Context information is below. | ||
--------------------- | ||
{context} | ||
--------------------- | ||
Given the context information and not prior knowledge, answer the question below: | ||
{question}<|end|>\n<|assistant|> | ||
""" | ||
refined_ctx_prompt_template = """{system}\n | ||
<|user|>\n {question} | ||
We have provided an existing answer: {existing_answer} | ||
We have the opportunity to refine the existing answer | ||
(only if needed) with some more context below. | ||
--------------------- | ||
{context} | ||
--------------------- | ||
Given the new context, refine the original answer to better answer the query. | ||
If the context isn't useful, return the original answer. | ||
Refined Answer:<|end|>\n<|assistant|> | ||
""" | ||
refined_question_conversation_awareness_prompt_template = """{system}\n | ||
<|user|>\n Chat History: | ||
--------------------- | ||
{chat_history} | ||
--------------------- | ||
Follow Up Question: {question} | ||
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question. | ||
Standalone question:<|end|>\n<|assistant|> | ||
""" | ||
|
||
refined_answer_conversation_awareness_prompt_template = """ | ||
<|user|>\n You are engaging in a conversation with a human participant who is unaware that they might be | ||
interacting with a machine. \n | ||
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n | ||
The conversation should be natural, coherent, and contextually relevant. \n | ||
Chat History: | ||
--------------------- | ||
{chat_history} | ||
--------------------- | ||
Follow Up Question: {question}\n | ||
Given the context provided in the Chat History and the follow up question, please answer the follow up question above. | ||
If the follow up question isn't correlated to the context provided in the Chat History, please just answer the follow up | ||
question, ignoring the context provided in the Chat History. | ||
Please also don't reformulate the follow up question, and write just a concise answer. | ||
<|end|>\n<|assistant|> | ||
""" |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from bot.client.llm_client import LlmClientType | ||
from bot.model.model import Model | ||
|
||
|
||
class StarlingSettings(Model): | ||
url = "https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF/resolve/main/Starling-LM-7B-beta-Q4_K_M.gguf" | ||
file_name = "Starling-LM-7B-beta-Q4_K_M.gguf" | ||
clients = [LlmClientType.LAMA_CPP] | ||
config = { | ||
"n_ctx": 4096, # The max sequence length to use - note that longer sequence lengths require much more resources | ||
"n_threads": 8, # The number of CPU threads to use, tailor to your system and the resulting performance | ||
"n_gpu_layers": 50, # The number of layers to offload to GPU, if you have GPU acceleration available | ||
} | ||
config_answer = {"temperature": 0.7, "stop": []} | ||
system_template = "You are a helpful, respectful and honest assistant. " | ||
qa_prompt_template = """{system}\n | ||
GPT4 Correct User: Answer the question below: | ||
{question}<|end_of_turn|>GPT4 Correct Assistant: | ||
""" | ||
ctx_prompt_template = """{system}\n | ||
GPT4 Correct User: Context information is below. | ||
--------------------- | ||
{context} | ||
--------------------- | ||
Given the context information and not prior knowledge, answer the question below: | ||
{question}<|end_of_turn|>GPT4 Correct Assistant: | ||
""" | ||
refined_ctx_prompt_template = """{system}\n | ||
GPT4 Correct User: The original query is as follows: {question} | ||
We have provided an existing answer: {existing_answer} | ||
We have the opportunity to refine the existing answer | ||
(only if needed) with some more context below. | ||
--------------------- | ||
{context} | ||
--------------------- | ||
Given the new context, refine the original answer to better answer the query. | ||
If the context isn't useful, return the original answer. | ||
Refined Answer:<|end_of_turn|>GPT4 Correct Assistant: | ||
""" | ||
refined_question_conversation_awareness_prompt_template = """{system}\n | ||
GPT4 Correct User: Chat History: | ||
--------------------- | ||
{chat_history} | ||
--------------------- | ||
Follow Up Question: {question} | ||
Given the above conversation and a follow up question, rephrase the follow up question to be a standalone question. | ||
Standalone question:<|end_of_turn|>GPT4 Correct Assistant: | ||
""" | ||
|
||
refined_answer_conversation_awareness_prompt_template = """ | ||
GPT4 Correct User: You are engaging in a conversation with a human participant who is unaware that they might be | ||
interacting with a machine. \n | ||
Your goal is to respond in a way that convincingly simulates human-like intelligence and behavior. \n | ||
The conversation should be natural, coherent, and contextually relevant. \n | ||
Chat History: | ||
--------------------- | ||
{chat_history} | ||
--------------------- | ||
Follow Up Question: {question}\n | ||
Given the context provided in the Chat History and the follow up question, please answer the follow up question above. | ||
If the follow up question isn't correlated to the context provided in the Chat History, please just answer the follow up | ||
question, ignoring the context provided in the Chat History. | ||
Please also don't reformulate the follow up question, and write just a concise answer. | ||
<|end_of_turn|>GPT4 Correct Assistant: | ||
""" |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# Todo | ||
- [ ] `llama-cpp-python` version `0.2.29` has a serious issue https://github.com/abetlen/llama-cpp-python/issues/1089 - Introduce unit tests to update to newer `llama-cpp-python` versions confidently. | ||
- [ ] try https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF (also the beta version). | ||
- [ ] try https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf | ||
- [ ] try Chat Templates https://medium.com/@ahmet_celebi/demystifying-chat-templates-of-llm-using-llama-cpp-and-ctransformers-f17871569cd6 | ||
- [ ] make docker container | ||
- Test `openchat-3.6-8b-20240522`: | ||
- https://huggingface.co/openchat/openchat-3.6-8b-20240522 | ||
- https://huggingface.co/bartowski/openchat-3.6-8b-20240522-GGUF | ||
- Try Chat Templates https://medium.com/@ahmet_celebi/demystifying-chat-templates-of-llm-using-llama-cpp-and-ctransformers-f17871569cd6 | ||
- Make docker container |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
0.2.28 | ||
0.2.76 |