add more docs

lucasavila00 · Apr 7, 2024 · c25d381 · c25d381
1 parent 99f989a
commit c25d381
Show file tree

Hide file tree

Showing 16 changed files with 421 additions and 67 deletions.
diff --git a/apps/lmscript-docs/docs/client/chat-templates.md b/apps/lmscript-docs/docs/client/chat-templates.md
@@ -4,4 +4,20 @@ sidebar_position: 6
 
 # Chat Templates
 
-TODO
+Use predefined chat templates to generate responses.
+
+Import the `ALL_CHAT_TEMPLATES` array to get a list of available templates.
+
+Please create an issue if you want to add a new template.
+
+```ts
+import { ALL_CHAT_TEMPLATES } from "@lmscript/client/chat-template";
+```
+
+```ts
+console.log(ALL_CHAT_TEMPLATES);
+```
+
+```js
+["mistral"];
+```
diff --git a/apps/lmscript-docs/docs/client/generation.md b/apps/lmscript-docs/docs/client/generation.md
@@ -4,7 +4,9 @@ sidebar_position: 1
 
 # Generation
 
-Generate text with LmScript. Optionally constrain the output with a regex expression.
+Let the language model generate text.
+
+Optionally, constrain the output with a regex expression.
 
 ## Generating and Capturing
 

diff --git a/apps/lmscript-docs/docs/client/roles.md b/apps/lmscript-docs/docs/client/roles.md
@@ -2,6 +2,53 @@
 sidebar_position: 3
 ---
 
-# Conversation Roles
+# Chat Roles
 
-TODO
+Use `.system`, `.user`, and `.assistant` to define conversation roles.
+
+## Usage
+
+```ts
+model
+  .system("Some system text...")
+  .user("Some user text...")
+  .assistant("Some assistant text...")
+  .run();
+```
+
+## Callbacks
+
+Use the callback function to perform additional operations.
+
+The callback function receives the model instance as an argument.
+
+The model instance is supports all methods of the LmScript class.
+
+```ts
+const model = new LmScript(
+  new SGLangBackend({
+    url: `http://localhost:30000`,
+    template: "mistral",
+  }),
+  { temperature: 0 },
+);
+
+const { rawText } = await model
+  .user((m) => m.push("Tell me a joke."))
+  .assistant((m) =>
+    m.push("Sure.").gen("response", { maxTokens: 128 }),
+  )
+  .run();
+
+console.log(rawText);
+```
+
+```
+`<s>[INST] Tell me a joke. [/INST]Sure. Here's a classic one for you:
+
+Why did the tomato turn red?
+
+Because it saw the salad dressing!
+
+I hope that brought a smile to your face! Do you have any other requests or questions? I'm here to help.`
+```
diff --git a/apps/lmscript-docs/docs/client/sampling-params.md b/apps/lmscript-docs/docs/client/sampling-params.md
@@ -4,4 +4,37 @@ sidebar_position: 5
 
 # Sampling Parameters
 
-TODO
+## Instance Parameters
+
+Set default sampling parameters for the model when creating an instance.
+
+```ts
+const model = new LmScript(backend, {
+  temperature: 0, // required
+  top_p: 0.3, // optional
+  top_k: 20, // optional
+  frequency_penalty: 0, // optional
+  presence_penalty: 0, // optional
+});
+```
+
+## Execution Parameters
+
+Override the default sampling parameters for a specific execution.
+
+```ts
+await model
+  .user("Tell me a joke.")
+  .assistant((m) =>
+    m.gen("joke", {
+      maxTokens: 128,
+    }),
+  )
+  .run({
+    temperature: 0, // optional
+    top_p: 0.3, // optional
+    top_k: 20, // optional
+    frequency_penalty: 0, // optional
+    presence_penalty: 0, // optional
+  });
+```
diff --git a/apps/lmscript-docs/docs/client/selection.md b/apps/lmscript-docs/docs/client/selection.md
@@ -4,4 +4,84 @@ sidebar_position: 2
 
 # Selection
 
-TODO
+Let me language model choose an option from a list of choices.
+
+:::note
+
+The SGLang backend is is implemented by computing the normalized
+log probabilities of all choices and selecting the one with the highest probability.
+
+The vLLM backend is implemented through autoregressive decoding with logit bias masking,
+according to the constraints set by the regex.
+
+This means that SGLang is slower but produces better results.
+:::
+
+## Usage
+
+```ts
+const {
+  captured: { bestLanguage },
+} = await model
+  .push("The best programming language is ")
+  .select("bestLanguage", {
+    choices: ["Python", "JavaScript", "Java", "C++", "C#"],
+  })
+  .run();
+```
+
+The captured text is available in the `captured` object.
+
+```ts
+console.log(bestLanguage);
+```
+
+```
+`C++`
+```
+
+## Selecting without a name
+
+```ts
+const { captured, rawText } = await model
+  .push("The best programming language is ")
+  .select({
+    choices: ["Python", "JavaScript", "Java", "C++", "C#"],
+  })
+  .run();
+
+console.log(captured);
+```
+
+```json
+{}
+```
+
+```ts
+console.log(rawText);
+```
+
+```
+`The best programming language is C++`
+```
+
+## Using regex for selection
+
+Use to make the SGLang backend work like the vLLM backend.
+
+```ts
+const {
+  captured: { jsOrTs },
+} = await model
+  .push("The best programming language is ")
+  .gen("jsOrTs", {
+    regex: "(JavaScript|TypeScript)",
+  })
+  .run();
+
+console.log(jsOrTs);
+```
+
+```
+`JavaScript`
+```
diff --git a/apps/lmscript-docs/docs/client/structured.md b/apps/lmscript-docs/docs/client/structured.md
diff --git a/apps/lmscript-docs/docs/docker/runpod-serverless-sglang.md b/apps/lmscript-docs/docs/docker/runpod-serverless-sglang.md
@@ -4,4 +4,30 @@ sidebar_position: 1
 
 # Runpod Serverless SGLang
 
-TODO
+Pre-built Docker image that runs on
+[Runpod Serverless](https://www.runpod.io/serverless-gpu).
+
+## Usage
+
+The image is published to
+https://hub.docker.com/r/degroote22/lmscript-runpod-serverless
+
+The DockerHub image can be deployed to a machine with a 24gb RAM GPU without any
+configuration changes.
+
+### Environment Variables for Configuration
+
+| Name                   | Detail                                                                                              |
+| ---------------------- | --------------------------------------------------------------------------------------------------- |
+| REPO_ID                | HuggingFace repository with the language model. Defaults to "TheBloke/Mistral-7B-Instruct-v0.2-AWQ" |
+| DISABLE_FLASH_INFER    | Set to "yes" to disable FlashInfer. Older GPUs are not supported by FlashInfer. Defaults to "no".   |
+| CONCURRENCY_PER_WORKER | Number of concurrent requests per Runpod Serverless Worker. Defaults to 50.                         |
+
+## Docker-Compose
+
+There is an example of a Docker-compose file in the repository.
+
+Clone the [LmScript repository](https://github.com/lucasavila00/LmScript/) and:
+
+- `cd docker/runpod-serverless-sglang`
+- `docker-compose up`
diff --git a/docker/runpod-serverless-sglang/README.md b/docker/runpod-serverless-sglang/README.md
@@ -10,16 +10,7 @@ https://hub.docker.com/r/degroote22/lmscript-runpod-serverless
 
 ## Usage
 
-The DockerHub image can be deployed to a machine with a 24gb RAM GPU without any
-configuration changes.
-
-### Environment Variables for Configuration
-
-| Name                   | Detail                                                                                              |
-| ---------------------- | --------------------------------------------------------------------------------------------------- |
-| REPO_ID                | HuggingFace repository with the language model. Defaults to "TheBloke/Mistral-7B-Instruct-v0.2-AWQ" |
-| DISABLE_FLASH_INFER    | Set to "yes" to disable FlashInfer. Older GPUs are not supported by FlashInfer. Defaults to "no".   |
-| CONCURRENCY_PER_WORKER | Number of concurrent requests per Runpod Serverless Worker. Defaults to 50.                         |
+Documentation is available in the [LmScript Docs](/docs/category/lmscriptclient).
 
 ## License
 

diff --git a/examples/docs/tests/client/chat-roles.test.ts b/examples/docs/tests/client/chat-roles.test.ts
@@ -0,0 +1,65 @@
+import { expect, test } from "vitest";
+import { md } from "mdts";
+import { SGLangBackend } from "@lmscript/client/backends/sglang";
+import { LmScript } from "@lmscript/client";
+test(
+  "client/roles",
+  async () => {
+    md`
+      ---
+      sidebar_position: 3
+      ---
+
+      # Chat Roles
+
+      Use \`.system\`, \`.user\`, and \`.assistant\` to define conversation roles.
+    `;
+
+    md`
+      ## Usage
+
+      ~~~ts
+      model
+        .system("Some system text...")
+        .user("Some user text...")
+        .assistant("Some assistant text...")
+        .run();
+      ~~~
+    `;
+
+    md`
+      ## Callbacks
+
+      Use the callback function to perform additional operations.
+
+      The callback function receives the model instance as an argument.
+
+      The model instance is supports all methods of the LmScript class.
+    `;
+
+    const model = new LmScript(
+      new SGLangBackend({
+        url: `http://localhost:30000`,
+        template: "mistral",
+      }),
+      { temperature: 0.0 }
+    );
+    const { rawText } = await model
+      .user((m) => m.push("Tell me a joke."))
+      .assistant((m) => m.push("Sure.").gen("response", { maxTokens: 128 }))
+      .run();
+
+    expect(rawText).toMatchInlineSnapshot(`
+    "<s>[INST] Tell me a joke. [/INST]Sure. Here's a classic one for you:
+
+    Why did the tomato turn red?
+
+    Because it saw the salad dressing!
+
+    I hope that brought a smile to your face! Do you have any other requests or questions? I'm here to help."
+  `);
+  },
+  {
+    timeout: 60_000,
+  }
+);
diff --git a/examples/docs/tests/client/chat-templates.test.ts b/examples/docs/tests/client/chat-templates.test.ts
@@ -1,5 +1,6 @@
-import { test } from "vitest";
+import { expect, test } from "vitest";
 import { md } from "mdts";
+import { ALL_CHAT_TEMPLATES } from "@lmscript/client/chat-template";
 
 test("client/chat-templates", async () => {
   md`
@@ -9,6 +10,20 @@ test("client/chat-templates", async () => {
 
     # Chat Templates
 
-    TODO
+    Use predefined chat templates to generate responses.
+
+    Import the \`ALL_CHAT_TEMPLATES\` array to get a list of available templates.
+
+    Please create an issue if you want to add a new template.
+
+    ~~~ts
+    import { ALL_CHAT_TEMPLATES } from "@lmscript/client/chat-template";
+    ~~~
   `;
+
+  expect(ALL_CHAT_TEMPLATES).toMatchInlineSnapshot(`
+    [
+      "mistral",
+    ]
+  `);
 });
diff --git a/examples/docs/tests/client/conversation-roles.test.ts b/examples/docs/tests/client/conversation-roles.test.ts
diff --git a/examples/docs/tests/client/generation.test.ts b/examples/docs/tests/client/generation.test.ts
@@ -21,7 +21,9 @@ test(
 
       # Generation
 
-      Generate text with LmScript. Optionally constrain the output with a regex expression.
+      Let the language model generate text.
+
+      Optionally, constrain the output with a regex expression.
     `;
     md`
       ## Generating and Capturing