Upgrading all versions. (#1759)

huggingface · Apr 18, 2024 · f9ee2c4 · f9ee2c4
1 parent 06c3d4b
commit f9ee2c4
Show file tree

Hide file tree

Showing 11 changed files with 781 additions and 754 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -14,6 +14,10 @@ edition = "2021"
 authors = ["Olivier Dehaene"]
 homepage = "https://github.com/huggingface/text-generation-inference"
 
+[workspace.dependencies]
+tokenizers = { version = "0.19.1", features = ["http"] }
+hf-hub = { version = "0.3.1", features = ["tokio"] }
+
 [profile.release]
 debug = 1
 incremental = true

diff --git a/benchmark/Cargo.toml b/benchmark/Cargo.toml
@@ -23,9 +23,9 @@ serde_json = "1.0"
 tabled = "0.14.0"
 text-generation-client = { path = "../router/client" }
 thiserror = "1.0.48"
-tokenizers = { version = "0.14.0", features = ["http"] }
+tokenizers = { workspace = true }
 tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync", "macros"] }
 tui = {package = "ratatui", version = "0.23", default-features = false, features = ["crossterm"]}
 tracing = "0.1.37"
 tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
-hf-hub = "0.3.1"
+hf-hub = { workspace = true }
diff --git a/docs/openapi.json b/docs/openapi.json
@@ -408,9 +408,14 @@
         },
         "responses": {
           "200": {
-            "description": "Generated Text",
+            "description": "Generated Chat Completion",
             "content": {
               "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ChatCompletion"
+                }
+              },
+              "text/event-stream": {
                 "schema": {
                   "$ref": "#/components/schemas/ChatCompletionChunk"
                 }
@@ -492,11 +497,16 @@
         },
         "responses": {
           "200": {
-            "description": "Generated Text",
+            "description": "Generated Chat Completion",
             "content": {
               "application/json": {
                 "schema": {
-                  "$ref": "#/components/schemas/ChatCompletionChunk"
+                  "$ref": "#/components/schemas/Completion"
+                }
+              },
+              "text/event-stream": {
+                "schema": {
+                  "$ref": "#/components/schemas/CompletionCompleteChunk"
                 }
               }
             }
@@ -930,7 +940,7 @@
           "tool_prompt": {
             "type": "string",
             "description": "A prompt to be appended before the tools",
-            "example": "\"Based on the conversation, please choose the most appropriate tool to use: \"",
+            "example": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"",
             "nullable": true
           },
           "tools": {
@@ -1071,7 +1081,10 @@
             "example": "mistralai/Mistral-7B-Instruct-v0.2"
           },
           "prompt": {
-            "type": "string",
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
             "description": "The prompt to generate completions for.",
             "example": "What is Deep Learning?"
           },
@@ -1234,17 +1247,17 @@
         "type": "object",
         "required": [
           "name",
-          "parameters"
+          "arguments"
         ],
         "properties": {
+          "arguments": {},
           "description": {
             "type": "string",
             "nullable": true
           },
           "name": {
             "type": "string"
-          },
-          "parameters": {}
+          }
         }
       },
       "GenerateParameters": {
@@ -1260,7 +1273,7 @@
           },
           "decoder_input_details": {
             "type": "boolean",
-            "default": "true"
+            "default": "false"
           },
           "details": {
             "type": "boolean",
@@ -1285,6 +1298,7 @@
                 "$ref": "#/components/schemas/GrammarType"
               }
             ],
+            "default": "null",
             "nullable": true
           },
           "max_new_tokens": {
@@ -1478,6 +1492,7 @@
           "max_batch_total_tokens",
           "max_waiting_tokens",
           "validation_workers",
+          "max_client_batch_size",
           "version"
         ],
         "properties": {
@@ -1503,6 +1518,11 @@
             "example": "2",
             "minimum": 0
           },
+          "max_client_batch_size": {
+            "type": "integer",
+            "example": "32",
+            "minimum": 0
+          },
           "max_concurrent_requests": {
             "type": "integer",
             "description": "Router Parameters",

diff --git a/router/Cargo.toml b/router/Cargo.toml
@@ -21,7 +21,7 @@ axum-tracing-opentelemetry = "0.14.1"
 text-generation-client = { path = "client" }
 clap = { version = "4.4.5", features = ["derive", "env"] }
 futures = "0.3.28"
-hf-hub = { version = "0.3.0", features = ["tokio"] }
+hf-hub = { workspace = true }
 jsonschema = { version = "0.17.1", features = ["draft202012"] }
 metrics = "0.21.1"
 metrics-exporter-prometheus = { version = "0.12.1", features = [] }
@@ -33,7 +33,7 @@ reqwest = { version = "0.11.20", features = [] }
 serde = "1.0.188"
 serde_json = "1.0.107"
 thiserror = "1.0.48"
-tokenizers = { version = "0.15.1", features = ["http"] }
+tokenizers = { workspace = true}
 tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
 tokio-stream = "0.1.14"
 tower-http = { version = "0.4.4", features = ["cors"] }

diff --git a/router/src/lib.rs b/router/src/lib.rs
@@ -238,7 +238,7 @@ pub(crate) struct GenerateParameters {
     #[schema(default = "true")]
     pub details: bool,
     #[serde(default)]
-    #[schema(default = "true")]
+    #[schema(default = "false")]
     pub decoder_input_details: bool,
     #[serde(default)]
     #[schema(
@@ -252,6 +252,7 @@ pub(crate) struct GenerateParameters {
     #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)]
     pub top_n_tokens: Option<u32>,
     #[serde(default)]
+    #[schema(nullable = true, default = "null", example = "null")]
     pub grammar: Option<GrammarType>,
 }