From e7248fe90e27c7c8e39dd4cac5874eb9f96ab182 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Thu, 1 Jun 2023 19:49:13 +0200 Subject: [PATCH] v0.8.2 --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- docs/openapi.json | 2 +- server/pyproject.toml | 2 +- server/text_generation_server/models/__init__.py | 4 +++- .../text_generation_server/models/flash_santacoder.py | 10 +++++++--- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67ad8674248..bd5994a8a4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2557,7 +2557,7 @@ dependencies = [ [[package]] name = "text-generation-benchmark" -version = "0.8.1" +version = "0.8.2" dependencies = [ "average", "clap", @@ -2577,7 +2577,7 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "0.8.1" +version = "0.8.2" dependencies = [ "futures", "grpc-metadata", @@ -2593,7 +2593,7 @@ dependencies = [ [[package]] name = "text-generation-launcher" -version = "0.8.1" +version = "0.8.2" dependencies = [ "clap", "ctrlc", @@ -2609,7 +2609,7 @@ dependencies = [ [[package]] name = "text-generation-router" -version = "0.8.1" +version = "0.8.2" dependencies = [ "async-stream", "axum", diff --git a/Cargo.toml b/Cargo.toml index 3190b64c495..b28286fa4fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ ] [workspace.package] -version = "0.8.1" +version = "0.8.2" edition = "2021" authors = ["Olivier Dehaene"] homepage = "https://github.com/huggingface/text-generation-inference" diff --git a/docs/openapi.json b/docs/openapi.json index 040c8e8bddc..e5ef0e3c195 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -10,7 +10,7 @@ "name": "Apache 2.0", "url": "https://www.apache.org/licenses/LICENSE-2.0" }, - "version": "0.8.1" + "version": "0.8.2" }, "paths": { "/": { diff --git a/server/pyproject.toml b/server/pyproject.toml index 7400a055f77..d381eac4d51 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "text-generation-server" -version = "0.8.1" +version = "0.8.2" description = "Text Generation Inference Python gRPC Server" authors = ["Olivier Dehaene "] diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index 78b68721f55..fc92d03d4fe 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -138,7 +138,9 @@ def get_model( trust_remote_code=trust_remote_code, ) - config_dict, _ = PretrainedConfig.get_config_dict(model_id, revision=revision, trust_remote_code=trust_remote_code) + config_dict, _ = PretrainedConfig.get_config_dict( + model_id, revision=revision, trust_remote_code=trust_remote_code + ) model_type = config_dict["model_type"] if model_type == "gpt_bigcode": diff --git a/server/text_generation_server/models/flash_santacoder.py b/server/text_generation_server/models/flash_santacoder.py index 7907e2cc3e0..e1c893d01aa 100644 --- a/server/text_generation_server/models/flash_santacoder.py +++ b/server/text_generation_server/models/flash_santacoder.py @@ -87,7 +87,9 @@ def load_weights( ): for filename in filenames: with safe_open( - filename, framework="pt", device=str(device) if quantize is None else "cpu" + filename, + framework="pt", + device=str(device) if quantize is None else "cpu", ) as f: for key in f.keys(): value = f.get_tensor(key) @@ -148,11 +150,13 @@ def load_weights( module._parameters[param_name][: value.shape[0]] = value elif "kv_attn.weight" in key: module._parameters[param_name][ - model.transformer.head_size * model.transformer.num_heads : + model.transformer.head_size + * model.transformer.num_heads : ] = value elif "kv_attn.bias" in key: module._parameters[param_name][ - model.transformer.head_size * model.transformer.num_heads : + model.transformer.head_size + * model.transformer.num_heads : ] = value else: if current_parameter_tensor.shape != value.shape: