Skip to content

Commit

Permalink
Add new ranked GGMLv3 models (#417)
Browse files Browse the repository at this point in the history
* Feature: add some best ranked models

* Fix: models size

* Update README.md

---------

Co-authored-by: pabl-o-ce <[email protected]>
Co-authored-by: Juan Calderon-Perez <[email protected]>
  • Loading branch information
3 people authored Jun 12, 2023
1 parent 4970865 commit 6bf84dd
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 86 deletions.
23 changes: 19 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 🦙 Serge - LLaMA made easy
# Serge - LLaMA made easy 🦙

![License](https://img.shields.io/github/license/serge-chat/serge)
[![Discord](https://img.shields.io/discord/1088427963801948201?label=Discord)](https://discord.gg/62Hc6FEYQH)
Expand Down Expand Up @@ -49,17 +49,24 @@ Then, just visit http://localhost:8008/, You can find the API documentation at h
Ensure you have Docker Desktop installed, WSL2 configured, and enough free RAM to run models.
## ☁️Kubernetes & Docker Compose Setup
## ☁️ Kubernetes & Docker Compose Setup
Instructions for setting up Serge on Kubernetes can be found in the [wiki](https://github.com/serge-chat/serge/wiki/Integrating-Serge-in-your-orchestration#kubernetes-example).
## 🧠 Supported Models
We currently support the following models:
- Airoboros 🎈
- Airoboros-7B
- Airoboros-13B
- Airoboros-30B
- Alpaca 🦙
- Alpaca-LoRA-65B
- GPT4-Alpaca-LoRA-30B
- Chronos 🌑
- Chronos-13B
- Chronos-33B
- GPT4All 🌍
- GPT4All-13B
- Guanaco 🦙
Expand All @@ -70,6 +77,9 @@ We currently support the following models:
- Koala 🐨
- Koala-7B
- Koala-13B
- Llama 🦙
- FinLlama-33B
- Llama-Supercot-30B
- Lazarus 💀
- Lazarus-30B
- Nous 🧠
Expand All @@ -80,13 +90,18 @@ We currently support the following models:
- Samantha-7B
- Samantha-13B
- Samantha-33B
- Stable 🐎
- Stable-Vicuna-13B
- Tulu 🎚
- Tulu-7B
- Tulu-13B
- Tulu-30B
- Vicuna 🦙
- Stable-Vicuna-13B
- Vicuna-CoT-7B
- Vicuna-CoT-13B
- Vicuna-v1.1-7B
- Vicuna-v1.1-13B
- VicUnlocked-30B
- VicUnlocked-65B
- Wizard 🧙
- Wizard-Mega-13B
- Wizard-Vicuna-Uncensored-7B
Expand Down
234 changes: 152 additions & 82 deletions api/src/serge/routers/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@
)

models_info = {
"Airoboros-7B": [
"TheBloke/airoboros-7b-gpt4-GGML",
"airoboros-7b-gpt4.ggmlv3.q5_1.bin",
5.06e9,
],
"Airoboros-7B-q6_K": [
"TheBloke/airoboros-7b-gpt4-GGML",
"airoboros-7b-gpt4.ggmlv3.q6_K.bin",
5.53e9,
],
"Airoboros-13B": [
"TheBloke/airoboros-13b-gpt4-GGML",
"airoboros-13b-gpt4.ggmlv3.q6_K.bin",
10.7e9,
],
"Airoboros-33B": [
"TheBloke/airoboros-33b-gpt4-GGML",
"airoboros-33b-gpt4.ggmlv3.q6_K.bin",
26.7e9,
],
"GPT4AlpacaLoRA-30B": [
"TheBloke/gpt4-alpaca-lora-30B-4bit-GGML",
"gpt4-alpaca-lora-30b.ggmlv3.q5_1.bin",
Expand All @@ -23,34 +43,129 @@
"alpaca-lora-65B.ggmlv3.q5_K_M.bin",
46.2e9,
],
"OpenAssistant-30B": [
"TheBloke/OpenAssistant-SFT-7-Llama-30B-GGML",
"OpenAssistant-SFT-7-Llama-30B.ggmlv3.q6_K.bin",
26.7e9,
],
"GPT4All-13B": [
"TheBloke/GPT4All-13B-snoozy-GGML",
"GPT4All-13B-snoozy.ggmlv3.q6_K.bin",
10.7e9,
],
"StableVicuna-13B": [
"TheBloke/stable-vicuna-13B-GGML",
"stable-vicuna-13B.ggmlv3.q6_K.bin",
"Chronos-13B": [
"TheBloke/chronos-13B-GGML",
"chronos-13b.ggmlv3.q6_K.bin",
10.7e9,
],
"Vicuna-v1.1-7B": [
"TheBloke/vicuna-7B-1.1-GGML",
"vicuna-7b-1.1.ggmlv3.q5_1.bin",
"Chronos-33B": [
"TheBloke/chronos-33b-GGML",
"chronos-33b.ggmlv3.q6_K.bin",
26.7e9,
],
"Guanaco-7B": [
"TheBloke/guanaco-7B-GGML",
"guanaco-7B.ggmlv3.q5_1.bin",
5.06e9,
],
"Vicuna-v1.1-7B-q6_K": [
"TheBloke/vicuna-7B-1.1-GGML",
"vicuna-7b-1.1.ggmlv3.q6_K.bin",
"Guanaco-7B-q6_K": [
"TheBloke/guanaco-7B-GGML",
"guanaco-7B.ggmlv3.q6_K.bin",
5.53e9,
],
"Vicuna-v1.1-13B": [
"TheBloke/vicuna-13b-1.1-GGML",
"vicuna-13b-1.1.ggmlv3.q6_K.bin",
"Guanaco-13B": [
"TheBloke/guanaco-13B-GGML",
"guanaco-13B.ggmlv3.q6_K.bin",
10.7e9,
],
"Guanaco-33B": [
"TheBloke/guanaco-33B-GGML",
"guanaco-33B.ggmlv3.q6_K.bin",
26.7e9,
],
"Guanaco-65B": [
"TheBloke/guanaco-65B-GGML",
"guanaco-65B.ggmlv3.q5_K_M.bin",
46.2e9,
],
"Koala-7B": [
"TheBloke/koala-7B-GGML",
"koala-7B.ggmlv3.q5_1.bin",
5.06e9,
],
"Koala-7B-q6_K": [
"TheBloke/koala-7B-GGML",
"koala-7B.ggmlv3.q6_K.bin",
5.53e9,
],
"Koala-13B": [
"TheBloke/koala-13B-GGML",
"koala-13B.ggmlv3.q6_K.bin",
10.7e9,
],
"FinLlama-33B": [
"TheBloke/fin-llama-33B-GGML",
"fin-llama-33b.ggmlv3.q6_K.bin",
26.7e9,
],
"Llama-Supercot-30B": [
"TheBloke/llama-30b-supercot-GGML",
"llama-30b-supercot.ggmlv3.q6_K.bin",
26.7e9,
],
"Lazarus-30B": [
"TheBloke/30B-Lazarus-GGML",
"30b-Lazarus.ggmlv3.q6_K.bin",
26.7e9,
],
"Nous-Hermes-13B": [
"TheBloke/Nous-Hermes-13B-GGML",
"nous-hermes-13b.ggmlv3.q6_K.bin",
10.7e9,
],
"OpenAssistant-30B": [
"TheBloke/OpenAssistant-SFT-7-Llama-30B-GGML",
"OpenAssistant-SFT-7-Llama-30B.ggmlv3.q6_K.bin",
26.7e9,
],
"Samantha-7B": [
"TheBloke/Samantha-7B-GGML",
"Samantha-7B.ggmlv3.q5_1.bin",
5.06e9,
],
"Samantha-7B-q6_K": [
"TheBloke/Samantha-7B-GGML",
"Samantha-7B.ggmlv3.q6_K.bin",
5.53e9,
],
"Samantha-13B": [
"TheBloke/Samantha-13B-GGML",
"samantha-13b.ggmlv3.q6_K.bin",
10.7e9,
],
"Samantha-33B": [
"TheBloke/Samantha-33B-GGML",
"samantha-33B.ggmlv3.q6_K.bin",
26.7e9,
],
"Tulu-7B": [
"TheBloke/tulu-7B-GGML",
"tulu-7b.ggmlv3.q5_1.bin",
5.06e9,
],
"Tulu-7B-q6_K": [
"TheBloke/tulu-7B-GGML",
"tulu-7b.ggmlv3.q6_K.bin",
5.06e9,
],
"Tulu-13B": [
"TheBloke/tulu-13B-GGML",
"tulu-13b.ggmlv3.q6_K.bin",
10.7e9,
],
"Tulu-30B": [
"TheBloke/tulu-30B-GGML",
"tulu-30b.ggmlv3.q6_K.bin",
26.7e9,
],
"StableVicuna-13B": [
"TheBloke/stable-vicuna-13B-GGML",
"stable-vicuna-13B.ggmlv3.q6_K.bin",
10.7e9,
],
"Vicuna-CoT-7B": [
Expand All @@ -68,31 +183,36 @@
"vicuna-13b-cot.ggmlv3.q6_K.bin",
10.7e9,
],
"Guanaco-7B": [
"TheBloke/guanaco-7B-GGML",
"guanaco-7B.ggmlv3.q5_1.bin",
"Vicuna-v1.1-7B": [
"TheBloke/vicuna-7B-1.1-GGML",
"vicuna-7b-1.1.ggmlv3.q5_1.bin",
5.06e9,
],
"Guanaco-7B-q6_K": [
"TheBloke/guanaco-7B-GGML",
"guanaco-7B.ggmlv3.q6_K.bin",
"Vicuna-v1.1-7B-q6_K": [
"TheBloke/vicuna-7B-1.1-GGML",
"vicuna-7b-1.1.ggmlv3.q6_K.bin",
5.53e9,
],
"Guanaco-13B": [
"TheBloke/guanaco-13B-GGML",
"guanaco-13B.ggmlv3.q6_K.bin",
"Vicuna-v1.1-13B": [
"TheBloke/vicuna-13b-1.1-GGML",
"vicuna-13b-1.1.ggmlv3.q6_K.bin",
10.7e9,
],
"Guanaco-33B": [
"TheBloke/guanaco-33B-GGML",
"guanaco-33B.ggmlv3.q6_K.bin",
"VicUnlocked-30B": [
"TheBloke/VicUnlocked-30B-LoRA-GGML",
"VicUnlocked-30B-LoRA.ggmlv3.q6_K.bin",
26.7e9,
],
"Guanaco-65B": [
"TheBloke/guanaco-65B-GGML",
"guanaco-65B.ggmlv3.q5_K_M.bin",
"VicUnlocked-65B": [
"TheBloke/VicUnlocked-alpaca-65B-QLoRA-GGML",
"vicunlocked-65b.ggmlv3.q5_K_M.bin",
46.2e9,
],
"Wizard-Mega-13B": [
"TheBloke/wizard-mega-13B-GGML",
"wizard-mega-13B.ggmlv3.q5_1.bin",
9.76e9,
],
"Wizard-Vicuna-Uncensored-7B": [
"TheBloke/Wizard-Vicuna-7B-Uncensored-GGML",
"Wizard-Vicuna-7B-Uncensored.ggmlv3.q5_1.bin",
Expand Down Expand Up @@ -138,56 +258,6 @@
"WizardLM-30B-Uncensored.ggmlv3.q6_K.bin",
26.7e9,
],
"Wizard-Mega-13B": [
"TheBloke/wizard-mega-13B-GGML",
"wizard-mega-13B.ggmlv3.q5_1.bin",
9.76e9,
],
"Lazarus-30B": [
"TheBloke/30B-Lazarus-GGML",
"30b-Lazarus.ggmlv3.q6_K.bin",
26.7e9,
],
"Nous-Hermes-13B": [
"TheBloke/Nous-Hermes-13B-GGML",
"nous-hermes-13b.ggmlv3.q6_K.bin",
10.7e9,
],
"Samantha-7B": [
"TheBloke/Samantha-7B-GGML",
"Samantha-7B.ggmlv3.q5_1.bin",
5.06e9,
],
"Samantha-7B-q6_K": [
"TheBloke/Samantha-7B-GGML",
"Samantha-7B.ggmlv3.q6_K.bin",
5.53e9,
],
"Samantha-13B": [
"TheBloke/Samantha-13B-GGML",
"samantha-13b.ggmlv3.q6_K.bin",
10.7e9,
],
"Samantha-33B": [
"TheBloke/Samantha-33B-GGML",
"samantha-33B.ggmlv3.q6_K.bin",
26.7e9,
],
"Koala-7B": [
"TheBloke/koala-7B-GGML",
"koala-7B.ggmlv3.q5_1.bin",
5.06e9,
],
"Koala-7B-q6_K": [
"TheBloke/koala-7B-GGML",
"koala-7B.ggmlv3.q6_K.bin",
5.53e9,
],
"Koala-13B": [
"TheBloke/koala-13B-GGML",
"koala-13B.ggmlv3.q6_K.bin",
10.7e9,
],
}

WEIGHTS = "/usr/src/app/weights/"
Expand Down

0 comments on commit 6bf84dd

Please sign in to comment.