From c1fae8aca517d1289e3f934750cb6ea52804ad6d Mon Sep 17 00:00:00 2001 From: Cameron Pfiffer Date: Thu, 10 Oct 2024 14:34:17 -0700 Subject: [PATCH] Create lmstudio.md --- docs/reference/serve/lmstudio.md | 89 ++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 docs/reference/serve/lmstudio.md diff --git a/docs/reference/serve/lmstudio.md b/docs/reference/serve/lmstudio.md new file mode 100644 index 000000000..e8c336137 --- /dev/null +++ b/docs/reference/serve/lmstudio.md @@ -0,0 +1,89 @@ +# Serve with LM Studio + +!!! tip "Would rather not self-host?" + + If you want to get started quickly with JSON-structured generation you can call instead [.json](https://h1xbpbfsf0w.typeform.com/to/ZgBCvJHF), a [.txt](http://dottxt.co) API that guarantees valid JSON. + +[LM Studio](https://lmstudio.ai/) is an application that runs local LLMs. It flexibly mixes GPU and CPU compute in hardware-constrained environments. + +As of [LM Studio 0.3.4](https://lmstudio.ai/blog/lmstudio-v0.3.4), it natively supports Outlines for structured text generation, using an OpenAI-compatible endpoint. + +## Setup + +1. Install LM Studio by visiting their [downloads page](https://lmstudio.ai/download). +2. Enable the LM Studio [server functionality](https://lmstudio.ai/docs/basics/server). +3. Download [a model](https://lmstudio.ai/docs/basics#1-download-an-llm-to-your-computer). +4. Install Python dependencies. +```bash +pip install pydantic openai +``` + +## Calling the server + +By default, LM Studio will serve from `http://localhost:1234`. If you are serving on a different port or host, make sure to change the `base_url` argument in `OpenAI` to the relevant location. + +```python +class Testing(BaseModel): + """ + A class representing a testing schema. + """ + name: str + age: int + +openai_client = openai.OpenAI( + base_url="http://0.0.0.0:1234/v1", + api_key="dopeness" +) + +# Make a request to the local LM Studio server +response = openai_client.beta.chat.completions.parse( + model="hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF", + messages=[ + {"role": "system", "content": "You are like so good at whatever you do."}, + {"role": "user", "content": "My name is Cameron and I am 28 years old. What's my name and age?"} + ], + response_format=Testing +) +``` + +You should receive a `ParsedChatCompletion[Testing]` object back: + +```python +ParsedChatCompletion[Testing]( + id='chatcmpl-3hykyf0fxus7jc90k6gwlw', + choices=[ + ParsedChoice[Testing]( + finish_reason='stop', + index=0, + logprobs=None, + message=ParsedChatCompletionMessage[Testing]( + content='{ "age": 28, "name": "Cameron" }', + refusal=None, + role='assistant', + function_call=None, + tool_calls=[], + parsed=Testing(name='Cameron', age=28) + ) + ) + ], + created=1728595622, + model='lmstudio-community/Phi-3.1-mini-128k-instruct-GGUF/Phi-3.1-mini-128k-instruct-Q4_K_M.gguf', + object='chat.completion', + service_tier=None, + system_fingerprint='lmstudio-community/Phi-3.1-mini-128k-instruct-GGUF/Phi-3.1-mini-128k-instruct- +Q4_K_M.gguf', + usage=CompletionUsage( + completion_tokens=17, + prompt_tokens=47, + total_tokens=64, + completion_tokens_details=None, + prompt_tokens_details=None + ) +) +``` + +You can retrieve your `Testing` object with + +```python +response.choices[0].message.parsed +```