diff --git a/README.md b/README.md index 62697e30db5..f536da37ba1 100644 --- a/README.md +++ b/README.md @@ -52,9 +52,9 @@ The following Environment Variables are available: | Variable Name | Description | Default Value | |-----------------------|---------------------------------------------------------|--------------------------------------| | `SERGE_DATABASE_URL` | Database connection string | `sqlite:////data/db/sql_app.db` | -| `SERGE_JWT_SECRET` | Key for auth token encryption. Use a random string | `uF7FGN5uzfGdFiPzR` | +| `SERGE_JWT_SECRET` | Key for auth token encryption. Use a random string | `uF7FGN5uzfGdFiPzR` | | `SERGE_SESSION_EXPIRY`| Duration in minutes before a user must reauthenticate | `60` | -| `NODE_ENV` | Node.js running environment | `production` | +| `NODE_ENV` | Node.js running environment | `production` | ## 🖥️ Windows @@ -73,30 +73,30 @@ Instructions for setting up Serge on Kubernetes can be found in the [wiki](https | **Code** | 13B, 33B | | **CodeLLaMA** | 7B, 7B-Instruct, 7B-Python, 13B, 13B-Instruct, 13B-Python, 34B, 34B-Instruct, 34B-Python | | **Codestral** | 22B v0.1 | -| **Gemma** | 2B, 1.1-2B-Instruct, 7B, 1.1-7B-Instruct | +| **Gemma** | 2B, 1.1-2B-Instruct, 7B, 1.1-7B-Instruct, 2-9B, 2-9B-Instruct, 2-27B, 2-27B-Instruct | | **Gorilla** | Falcon-7B-HF-v0, 7B-HF-v1, Openfunctions-v1, Openfunctions-v2 | -| **Falcon** | 7B, 7B-Instruct, 40B, 40B-Instruct | +| **Falcon** | 7B, 7B-Instruct, 11B, 40B, 40B-Instruct | | **LLaMA 2** | 7B, 7B-Chat, 7B-Coder, 13B, 13B-Chat, 70B, 70B-Chat, 70B-OASST | | **LLaMA 3** | 11B-Instruct, 13B-Instruct, 16B-Instruct | | **LLaMA Pro** | 8B, 8B-Instruct | -| **Med42** | 70B | +| **Mathstral** | 7B | +| **Med42** | 70B, v2-8B | | **Medalpaca** | 13B | | **Medicine** | Chat, LLM | | **Meditron** | 7B, 7B-Chat, 70B | -| **Meta-LlaMA-3** | 8B, 8B-Instruct, 70B, 70B-Instruct | -| **Mistral** | 7B-V0.1, 7B-Instruct-v0.2, 7B-OpenOrca | +| **Meta-LlaMA-3** | 3-8B, 3.1-8B, 3-8B-Instruct, 3.1-8B-Instruct, 3-70B, 3.1-70B, 3-70B-Instruct, 3.1-70B-Instruct | +| **Mistral** | 7B-V0.1, 7B-Instruct-v0.2, 7B-OpenOrca, Nemo-Instruct | | **MistralLite** | 7B | | **Mixtral** | 8x7B-v0.1, 8x7B-Dolphin-2.7, 8x7B-Instruct-v0.1 | | **Neural-Chat** | 7B-v3.3 | | **Notus** | 7B-v1 | | **Notux** | 8x7b-v1 | | **Nous-Hermes 2** | Mistral-7B-DPO, Mixtral-8x7B-DPO, Mistral-8x7B-SFT | -| **OpenChat** | 7B-v3.5-1210 | +| **OpenChat** | 7B-v3.5-1210? 8B-v3.6-20240522 | | **OpenCodeInterpreter** | DS-6.7B, DS-33B, CL-7B, CL-13B, CL-70B | | **OpenLLaMA** | 3B-v2, 7B-v2, 13B-v2 | | **Orca 2** | 7B, 13B | -| **Phi 2** | 2.7B | -| **Phi 3** | mini-4k-instruct, medium-4k-instruct, medium-128k-instruct | +| **Phi** | 2-2.7B, 3-mini-4k-instruct, 3.1-mini-4k-instruct, 3.1-mini-128k-instruct, 3-medium-4k-instruct, 3-medium-128k-instruct | | **Python Code** | 13B, 33B | | **PsyMedRP** | 13B-v1, 20B-v1 | | **Starling LM** | 7B-Alpha | diff --git a/api/src/serge/data/models.json b/api/src/serge/data/models.json index aedd62d2335..7079c611b0a 100644 --- a/api/src/serge/data/models.json +++ b/api/src/serge/data/models.json @@ -20,7 +20,7 @@ "models": [ { "name": "BioMistral-7B", - "repo": "BioMistral/BioMistral-7B-GGUF", + "repo": "BioMistral/biomistral-7B-GGUF", "files": [ { "name": "q4_K_M", @@ -30,7 +30,7 @@ ] } ] - }, + }, { "name": "Code", "models": [ @@ -202,7 +202,18 @@ "disk_space": 4975385792.0 } ] - }, + }, + { + "name": "Falcon-11B", + "repo": "bartowski/falcon-11B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "falcon-11B-Q4_K_M.gguf", + "disk_space": 6849675168.0 + } + ] + }, { "name": "Falcon-40B", "repo": "maddes8cht/tiiuae-falcon-40b-gguf", @@ -273,6 +284,50 @@ "disk_space": 5329759200.0 } ] + }, + { + "name": "Gemma-2-9B", + "repo": "mradermacher/gemma-2-9b-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "gemma-2-9b.Q4_K_M.gguf", + "disk_space": 5761058240.0 + } + ] + }, + { + "name": "Gemma-2-9B-Instruct", + "repo": "bartowski/gemma-2-9b-it-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "gemma-2-9b-it-Q4_K_M.gguf", + "disk_space": 5761057728.0 + } + ] + }, + { + "name": "Gemma-2-27B", + "repo": "mradermacher/gemma-2-27b-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "gemma-2-27b.Q4_K_M.gguf", + "disk_space": 16645382176.0 + } + ] + }, + { + "name": "Gemma-2-27B-Instruct", + "repo": "bartowski/gemma-2-27b-it-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "gemma-2-27b-it-Q4_K_M.gguf", + "disk_space": 16645381632.0 + } + ] } ] }, @@ -482,7 +537,23 @@ ] } ] - }, + }, + { + "name": "Mathstral", + "models": [ + { + "name": "Mathstral-7B", + "repo": "MaziyarPanahi/mathstral-7B-v0.1-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "mathstral-7B-v0.1.Q4_K_M.gguf", + "disk_space": 4372811584.0 + } + ] + } + ] + }, { "name": "Med42", "models": [ @@ -496,7 +567,18 @@ "disk_space": 41422910368.0 } ] - } + }, + { + "name": "Med42-v2-8B", + "repo": "mradermacher/Llama3-Med42-8B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "Llama3-Med42-8B.Q4_K_M.gguf", + "disk_space": 4920734464.0 + } + ] + } ] }, { @@ -542,12 +624,12 @@ }, { "name": "Medicine-LLM-13B", - "repo": "TheBloke/medicine-LLM-13B-GGUF", + "repo": "mradermacher/medicine-LLM-13B-GGUF", "files": [ { "name": "q4_K_M", - "filename": "medicine-llm-13b.Q4_K_M.gguf", - "disk_space": 7865963456.0 + "filename": "medicine-LLM-13B.Q4_K_M.gguf", + "disk_space": 7865963936.0 } ] } @@ -605,6 +687,17 @@ } ] }, + { + "name": "Meta-Llama-3_1-8B", + "repo": "QuantFactory/Meta-Llama-3.1-8B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "Meta-Llama-3.1-8B.Q4_K_M.gguf", + "disk_space": 4920733856.0 + } + ] + }, { "name": "Meta-Llama-3-8B-Instruct", "repo": "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF", @@ -616,6 +709,17 @@ } ] }, + { + "name": "Meta-Llama-3_1-8B-Instruct", + "repo": "QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "disk_space": 4920734240.0 + } + ] + }, { "name": "Meta-Llama-3-70B", "repo": "NousResearch/Meta-Llama-3-70B-GGUF", @@ -627,6 +731,17 @@ } ] }, + { + "name": "Meta-Llama-3_1-70B", + "repo": "mradermacher/Meta-Llama-3.1-70B-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "Meta-Llama-3.1-70B.Q4_K_M.gguf", + "disk_space": 42520393600.0 + } + ] + }, { "name": "Meta-Llama-3-70B-Instruct", "repo": "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF", @@ -637,6 +752,17 @@ "disk_space": 42520906208.0 } ] + }, + { + "name": "Meta-Llama-3_1-70B-Instruct", + "repo": "mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf", + "disk_space": 42520394080.0 + } + ] } ] }, @@ -655,13 +781,13 @@ ] }, { - "name": "Mistral-7B-Instruct-v0_2", - "repo": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", + "name": "Mistral-7B-Instruct-v0_3", + "repo": "rubra-ai/Mistral-7B-Instruct-v0.3-GGUFF", "files": [ { "name": "q4_K_M", - "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf", - "disk_space": 4368439584.0 + "filename": "rubra-mistral-7b-instruct-v0.3.Q4_K_M.gguf", + "disk_space": 4896118816.0 } ] }, @@ -675,6 +801,17 @@ "disk_space": 4368450304.0 } ] + }, + { + "name": "Mistral-Nemo-Instruct", + "repo": "bartowski/Mistral-Nemo-Instruct-2407-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "Mistral-Nemo-Instruct-2407-Q4_K_M.gguf", + "disk_space": 7477204960.0 + } + ] } ] }, @@ -831,7 +968,18 @@ "disk_space": 4368450688.0 } ] - } + }, + { + "name": "OpenChat-3_6-8B-20240522", + "repo": "bartowski/openchat-3.6-8b-20240522-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "openchat-3.6-8b-20240522-Q4_K_M.gguf", + "disk_space": 4920734496.0 + } + ] + }, ] }, { @@ -984,6 +1132,28 @@ } ] }, + { + "name": "Phi-3_1-mini-4k-instruct", + "repo": "bartowski/Phi-3.1-mini-4k-instruct-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "Phi-3.1-mini-4k-instruct-Q4_K_M.gguf", + "disk_space": 2393232096.0 + } + ] + }, + { + "name": "Phi-3_1-mini-128k-instruct", + "repo": "bartowski/Phi-3.1-mini-128k-instruct-GGUF", + "files": [ + { + "name": "q4_K_M", + "filename": "Phi-3.1-mini-128k-instruct-Q4_K_M.gguf", + "disk_space": 2393232640.0 + } + ] + }, { "name": "Phi-3-medium-4k-instruct", "repo": "bartowski/Phi-3-medium-4k-instruct-GGUF", diff --git a/scripts/serge.env b/scripts/serge.env index 5ee93cf5b66..d5b30c57b7d 100644 --- a/scripts/serge.env +++ b/scripts/serge.env @@ -1,3 +1,3 @@ -LLAMA_PYTHON_VERSION=0.2.82 +LLAMA_PYTHON_VERSION=0.2.86 SERGE_ENABLE_IPV4=true SERGE_ENABLE_IPV6=false