Skip to content

Commit 630800e

Browse files
v1.3.4
1 parent 529d7c2 commit 630800e

File tree

8 files changed

+86
-98
lines changed

8 files changed

+86
-98
lines changed

Cargo.lock

Lines changed: 75 additions & 86 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ members = [
88
]
99

1010
[workspace.package]
11-
version = "1.3.3"
11+
version = "1.3.4"
1212
edition = "2021"
1313
authors = ["Olivier Dehaene"]
1414
homepage = "https://github.com/huggingface/text-generation-inference"

docs/openapi.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"name": "Apache 2.0",
1111
"url": "https://www.apache.org/licenses/LICENSE-2.0"
1212
},
13-
"version": "1.3.3"
13+
"version": "1.3.4"
1414
},
1515
"paths": {
1616
"/": {

integration-tests/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "text-generation-integration-tests"
3-
version = "1.3.3"
3+
version = "1.3.4"
44
description = "Text Generation Inference integration tests"
55
authors = ["Nicolas Patry <[email protected]>"]
66

server/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "text-generation-server"
3-
version = "1.3.3"
3+
version = "1.3.4"
44
description = "Text Generation Inference Python gRPC Server"
55
authors = ["Olivier Dehaene <[email protected]>"]
66

server/text_generation_server/utils/layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
V2 = False
4040
log_once(
4141
logger.warning,
42-
"Disabling exllama v2 and using v1 instead because there are issues when sharding"
42+
"Disabling exllama v2 and using v1 instead because there are issues when sharding",
4343
)
4444

4545
if os.getenv("DISABLE_EXLLAMA") == "True":

server/text_generation_server/utils/log.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33

44
@lru_cache(10)
5-
def log_once(log, msg:str):
5+
def log_once(log, msg: str):
66
log(msg)

server/text_generation_server/utils/weights.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,9 @@ def get_multi_weights_col(self, prefixes: List[str], quantize: str, dim: int):
215215
bits, groupsize, desc_act = self._get_gptq_params()
216216
from text_generation_server.utils.layers import HAS_EXLLAMA
217217

218-
use_exllama = bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
218+
use_exllama = (
219+
bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
220+
)
219221
weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama)
220222
else:
221223
w = [self.get_sharded(f"{p}.weight", dim=0) for p in prefixes]
@@ -281,14 +283,11 @@ def get_multi_weights_row(self, prefix: str, quantize: str):
281283
if CAN_EXLLAMA:
282284
log_once(
283285
logger.warning,
284-
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True"
286+
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True",
285287
)
286288
use_exllama = False
287289
else:
288-
log_once(
289-
logger.info,
290-
f"Using exllama kernels v{HAS_EXLLAMA}"
291-
)
290+
log_once(logger.info, f"Using exllama kernels v{HAS_EXLLAMA}")
292291

293292
g_idx = self.get_sharded(f"{prefix}.g_idx", dim=0)
294293

0 commit comments

Comments
 (0)